]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Virtio Network Device | |
3 | * | |
4 | * Copyright IBM, Corp. 2007 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <aliguori@us.ibm.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include "qemu/osdep.h" | |
15 | #include "qemu/atomic.h" | |
16 | #include "qemu/iov.h" | |
17 | #include "qemu/log.h" | |
18 | #include "qemu/main-loop.h" | |
19 | #include "qemu/module.h" | |
20 | #include "hw/virtio/virtio.h" | |
21 | #include "net/net.h" | |
22 | #include "net/checksum.h" | |
23 | #include "net/tap.h" | |
24 | #include "qemu/error-report.h" | |
25 | #include "qemu/timer.h" | |
26 | #include "qemu/option.h" | |
27 | #include "qemu/option_int.h" | |
28 | #include "qemu/config-file.h" | |
29 | #include "qobject/qdict.h" | |
30 | #include "hw/virtio/virtio-net.h" | |
31 | #include "net/vhost_net.h" | |
32 | #include "net/announce.h" | |
33 | #include "hw/virtio/virtio-bus.h" | |
34 | #include "qapi/error.h" | |
35 | #include "qapi/qapi-events-net.h" | |
36 | #include "hw/qdev-properties.h" | |
37 | #include "qapi/qapi-types-migration.h" | |
38 | #include "qapi/qapi-events-migration.h" | |
39 | #include "hw/virtio/virtio-access.h" | |
40 | #include "migration/misc.h" | |
41 | #include "standard-headers/linux/ethtool.h" | |
42 | #include "system/system.h" | |
43 | #include "system/replay.h" | |
44 | #include "trace.h" | |
45 | #include "monitor/qdev.h" | |
46 | #include "monitor/monitor.h" | |
47 | #include "hw/pci/pci_device.h" | |
48 | #include "net_rx_pkt.h" | |
49 | #include "hw/virtio/vhost.h" | |
50 | #include "system/qtest.h" | |
51 | ||
52 | #define VIRTIO_NET_VM_VERSION 11 | |
53 | ||
54 | /* previously fixed value */ | |
55 | #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 | |
56 | #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 | |
57 | ||
58 | /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ | |
59 | #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE | |
60 | #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE | |
61 | ||
62 | #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ | |
63 | ||
64 | #define VIRTIO_NET_TCP_FLAG 0x3F | |
65 | #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 | |
66 | ||
67 | /* IPv4 max payload, 16 bits in the header */ | |
68 | #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) | |
69 | #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 | |
70 | ||
71 | /* header length value in ip header without option */ | |
72 | #define VIRTIO_NET_IP4_HEADER_LENGTH 5 | |
73 | ||
74 | #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ | |
75 | #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD | |
76 | ||
77 | /* Purge coalesced packets timer interval, This value affects the performance | |
78 | a lot, and should be tuned carefully, '300000'(300us) is the recommended | |
79 | value to pass the WHQL test, '50000' can gain 2x netperf throughput with | |
80 | tso/gso/gro 'off'. */ | |
81 | #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 | |
82 | ||
83 | #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ | |
84 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ | |
85 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ | |
86 | VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ | |
87 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ | |
88 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ | |
89 | VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ | |
90 | VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ | |
91 | VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) | |
92 | ||
93 | static const VirtIOFeature feature_sizes[] = { | |
94 | {.flags = 1ULL << VIRTIO_NET_F_MAC, | |
95 | .end = endof(struct virtio_net_config, mac)}, | |
96 | {.flags = 1ULL << VIRTIO_NET_F_STATUS, | |
97 | .end = endof(struct virtio_net_config, status)}, | |
98 | {.flags = 1ULL << VIRTIO_NET_F_MQ, | |
99 | .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, | |
100 | {.flags = 1ULL << VIRTIO_NET_F_MTU, | |
101 | .end = endof(struct virtio_net_config, mtu)}, | |
102 | {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, | |
103 | .end = endof(struct virtio_net_config, duplex)}, | |
104 | {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), | |
105 | .end = endof(struct virtio_net_config, supported_hash_types)}, | |
106 | {} | |
107 | }; | |
108 | ||
109 | static const VirtIOConfigSizeParams cfg_size_params = { | |
110 | .min_size = endof(struct virtio_net_config, mac), | |
111 | .max_size = sizeof(struct virtio_net_config), | |
112 | .feature_sizes = feature_sizes | |
113 | }; | |
114 | ||
115 | static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) | |
116 | { | |
117 | VirtIONet *n = qemu_get_nic_opaque(nc); | |
118 | ||
119 | return &n->vqs[nc->queue_index]; | |
120 | } | |
121 | ||
122 | static int vq2q(int queue_index) | |
123 | { | |
124 | return queue_index / 2; | |
125 | } | |
126 | ||
127 | static void flush_or_purge_queued_packets(NetClientState *nc) | |
128 | { | |
129 | if (!nc->peer) { | |
130 | return; | |
131 | } | |
132 | ||
133 | qemu_flush_or_purge_queued_packets(nc->peer, true); | |
134 | assert(!virtio_net_get_subqueue(nc)->async_tx.elem); | |
135 | } | |
136 | ||
137 | /* TODO | |
138 | * - we could suppress RX interrupt if we were so inclined. | |
139 | */ | |
140 | ||
141 | static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) | |
142 | { | |
143 | VirtIONet *n = VIRTIO_NET(vdev); | |
144 | struct virtio_net_config netcfg; | |
145 | NetClientState *nc = qemu_get_queue(n->nic); | |
146 | static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; | |
147 | ||
148 | int ret = 0; | |
149 | memset(&netcfg, 0 , sizeof(struct virtio_net_config)); | |
150 | virtio_stw_p(vdev, &netcfg.status, n->status); | |
151 | virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); | |
152 | virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); | |
153 | memcpy(netcfg.mac, n->mac, ETH_ALEN); | |
154 | virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); | |
155 | netcfg.duplex = n->net_conf.duplex; | |
156 | netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; | |
157 | virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, | |
158 | virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? | |
159 | VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); | |
160 | virtio_stl_p(vdev, &netcfg.supported_hash_types, | |
161 | VIRTIO_NET_RSS_SUPPORTED_HASHES); | |
162 | memcpy(config, &netcfg, n->config_size); | |
163 | ||
164 | /* | |
165 | * Is this VDPA? No peer means not VDPA: there's no way to | |
166 | * disconnect/reconnect a VDPA peer. | |
167 | */ | |
168 | if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { | |
169 | ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, | |
170 | n->config_size); | |
171 | if (ret == -1) { | |
172 | return; | |
173 | } | |
174 | ||
175 | /* | |
176 | * Some NIC/kernel combinations present 0 as the mac address. As that | |
177 | * is not a legal address, try to proceed with the address from the | |
178 | * QEMU command line in the hope that the address has been configured | |
179 | * correctly elsewhere - just not reported by the device. | |
180 | */ | |
181 | if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { | |
182 | info_report("Zero hardware mac address detected. Ignoring."); | |
183 | memcpy(netcfg.mac, n->mac, ETH_ALEN); | |
184 | } | |
185 | ||
186 | netcfg.status |= virtio_tswap16(vdev, | |
187 | n->status & VIRTIO_NET_S_ANNOUNCE); | |
188 | memcpy(config, &netcfg, n->config_size); | |
189 | } | |
190 | } | |
191 | ||
192 | static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) | |
193 | { | |
194 | VirtIONet *n = VIRTIO_NET(vdev); | |
195 | struct virtio_net_config netcfg = {}; | |
196 | NetClientState *nc = qemu_get_queue(n->nic); | |
197 | ||
198 | memcpy(&netcfg, config, n->config_size); | |
199 | ||
200 | if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && | |
201 | !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && | |
202 | memcmp(netcfg.mac, n->mac, ETH_ALEN)) { | |
203 | memcpy(n->mac, netcfg.mac, ETH_ALEN); | |
204 | qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); | |
205 | } | |
206 | ||
207 | /* | |
208 | * Is this VDPA? No peer means not VDPA: there's no way to | |
209 | * disconnect/reconnect a VDPA peer. | |
210 | */ | |
211 | if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { | |
212 | vhost_net_set_config(get_vhost_net(nc->peer), | |
213 | (uint8_t *)&netcfg, 0, n->config_size, | |
214 | VHOST_SET_CONFIG_TYPE_FRONTEND); | |
215 | } | |
216 | } | |
217 | ||
218 | static bool virtio_net_started(VirtIONet *n, uint8_t status) | |
219 | { | |
220 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
221 | return (status & VIRTIO_CONFIG_S_DRIVER_OK) && | |
222 | (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; | |
223 | } | |
224 | ||
225 | static void virtio_net_announce_notify(VirtIONet *net) | |
226 | { | |
227 | VirtIODevice *vdev = VIRTIO_DEVICE(net); | |
228 | trace_virtio_net_announce_notify(); | |
229 | ||
230 | net->status |= VIRTIO_NET_S_ANNOUNCE; | |
231 | virtio_notify_config(vdev); | |
232 | } | |
233 | ||
234 | static void virtio_net_announce_timer(void *opaque) | |
235 | { | |
236 | VirtIONet *n = opaque; | |
237 | trace_virtio_net_announce_timer(n->announce_timer.round); | |
238 | ||
239 | n->announce_timer.round--; | |
240 | virtio_net_announce_notify(n); | |
241 | } | |
242 | ||
243 | static void virtio_net_announce(NetClientState *nc) | |
244 | { | |
245 | VirtIONet *n = qemu_get_nic_opaque(nc); | |
246 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
247 | ||
248 | /* | |
249 | * Make sure the virtio migration announcement timer isn't running | |
250 | * If it is, let it trigger announcement so that we do not cause | |
251 | * confusion. | |
252 | */ | |
253 | if (n->announce_timer.round) { | |
254 | return; | |
255 | } | |
256 | ||
257 | if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && | |
258 | virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { | |
259 | virtio_net_announce_notify(n); | |
260 | } | |
261 | } | |
262 | ||
263 | static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) | |
264 | { | |
265 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
266 | NetClientState *nc = qemu_get_queue(n->nic); | |
267 | int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; | |
268 | int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? | |
269 | n->max_ncs - n->max_queue_pairs : 0; | |
270 | ||
271 | if (!get_vhost_net(nc->peer)) { | |
272 | return; | |
273 | } | |
274 | ||
275 | if ((virtio_net_started(n, status) && !nc->peer->link_down) == | |
276 | !!n->vhost_started) { | |
277 | return; | |
278 | } | |
279 | if (!n->vhost_started) { | |
280 | int r, i; | |
281 | ||
282 | if (n->needs_vnet_hdr_swap) { | |
283 | error_report("backend does not support %s vnet headers; " | |
284 | "falling back on userspace virtio", | |
285 | virtio_is_big_endian(vdev) ? "BE" : "LE"); | |
286 | return; | |
287 | } | |
288 | ||
289 | /* Any packets outstanding? Purge them to avoid touching rings | |
290 | * when vhost is running. | |
291 | */ | |
292 | for (i = 0; i < queue_pairs; i++) { | |
293 | NetClientState *qnc = qemu_get_subqueue(n->nic, i); | |
294 | ||
295 | /* Purge both directions: TX and RX. */ | |
296 | qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); | |
297 | qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); | |
298 | } | |
299 | ||
300 | if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { | |
301 | r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); | |
302 | if (r < 0) { | |
303 | error_report("%uBytes MTU not supported by the backend", | |
304 | n->net_conf.mtu); | |
305 | ||
306 | return; | |
307 | } | |
308 | } | |
309 | ||
310 | n->vhost_started = 1; | |
311 | r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); | |
312 | if (r < 0) { | |
313 | error_report("unable to start vhost net: %d: " | |
314 | "falling back on userspace virtio", -r); | |
315 | n->vhost_started = 0; | |
316 | } | |
317 | } else { | |
318 | vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); | |
319 | n->vhost_started = 0; | |
320 | } | |
321 | } | |
322 | ||
323 | static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, | |
324 | NetClientState *peer, | |
325 | bool enable) | |
326 | { | |
327 | if (virtio_is_big_endian(vdev)) { | |
328 | return qemu_set_vnet_be(peer, enable); | |
329 | } else { | |
330 | return qemu_set_vnet_le(peer, enable); | |
331 | } | |
332 | } | |
333 | ||
334 | static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, | |
335 | int queue_pairs, bool enable) | |
336 | { | |
337 | int i; | |
338 | ||
339 | for (i = 0; i < queue_pairs; i++) { | |
340 | if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && | |
341 | enable) { | |
342 | while (--i >= 0) { | |
343 | virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); | |
344 | } | |
345 | ||
346 | return true; | |
347 | } | |
348 | } | |
349 | ||
350 | return false; | |
351 | } | |
352 | ||
353 | static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) | |
354 | { | |
355 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
356 | int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; | |
357 | ||
358 | if (virtio_net_started(n, status)) { | |
359 | /* Before using the device, we tell the network backend about the | |
360 | * endianness to use when parsing vnet headers. If the backend | |
361 | * can't do it, we fallback onto fixing the headers in the core | |
362 | * virtio-net code. | |
363 | */ | |
364 | n->needs_vnet_hdr_swap = n->has_vnet_hdr && | |
365 | virtio_net_set_vnet_endian(vdev, n->nic->ncs, | |
366 | queue_pairs, true); | |
367 | } else if (virtio_net_started(n, vdev->status)) { | |
368 | /* After using the device, we need to reset the network backend to | |
369 | * the default (guest native endianness), otherwise the guest may | |
370 | * lose network connectivity if it is rebooted into a different | |
371 | * endianness. | |
372 | */ | |
373 | virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); | |
374 | } | |
375 | } | |
376 | ||
377 | static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) | |
378 | { | |
379 | unsigned int dropped = virtqueue_drop_all(vq); | |
380 | if (dropped) { | |
381 | virtio_notify(vdev, vq); | |
382 | } | |
383 | } | |
384 | ||
385 | static int virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) | |
386 | { | |
387 | VirtIONet *n = VIRTIO_NET(vdev); | |
388 | VirtIONetQueue *q; | |
389 | int i; | |
390 | uint8_t queue_status; | |
391 | ||
392 | virtio_net_vnet_endian_status(n, status); | |
393 | virtio_net_vhost_status(n, status); | |
394 | ||
395 | for (i = 0; i < n->max_queue_pairs; i++) { | |
396 | NetClientState *ncs = qemu_get_subqueue(n->nic, i); | |
397 | bool queue_started; | |
398 | q = &n->vqs[i]; | |
399 | ||
400 | if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { | |
401 | queue_status = 0; | |
402 | } else { | |
403 | queue_status = status; | |
404 | } | |
405 | queue_started = | |
406 | virtio_net_started(n, queue_status) && !n->vhost_started; | |
407 | ||
408 | if (queue_started) { | |
409 | qemu_flush_queued_packets(ncs); | |
410 | } | |
411 | ||
412 | if (!q->tx_waiting) { | |
413 | continue; | |
414 | } | |
415 | ||
416 | if (queue_started) { | |
417 | if (q->tx_timer) { | |
418 | timer_mod(q->tx_timer, | |
419 | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); | |
420 | } else { | |
421 | replay_bh_schedule_event(q->tx_bh); | |
422 | } | |
423 | } else { | |
424 | if (q->tx_timer) { | |
425 | timer_del(q->tx_timer); | |
426 | } else { | |
427 | qemu_bh_cancel(q->tx_bh); | |
428 | } | |
429 | if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && | |
430 | (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && | |
431 | vdev->vm_running) { | |
432 | /* if tx is waiting we are likely have some packets in tx queue | |
433 | * and disabled notification */ | |
434 | q->tx_waiting = 0; | |
435 | virtio_queue_set_notification(q->tx_vq, 1); | |
436 | virtio_net_drop_tx_queue_data(vdev, q->tx_vq); | |
437 | } | |
438 | } | |
439 | } | |
440 | return 0; | |
441 | } | |
442 | ||
443 | static void virtio_net_set_link_status(NetClientState *nc) | |
444 | { | |
445 | VirtIONet *n = qemu_get_nic_opaque(nc); | |
446 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
447 | uint16_t old_status = n->status; | |
448 | ||
449 | if (nc->link_down) | |
450 | n->status &= ~VIRTIO_NET_S_LINK_UP; | |
451 | else | |
452 | n->status |= VIRTIO_NET_S_LINK_UP; | |
453 | ||
454 | if (n->status != old_status) | |
455 | virtio_notify_config(vdev); | |
456 | ||
457 | virtio_net_set_status(vdev, vdev->status); | |
458 | } | |
459 | ||
460 | static void rxfilter_notify(NetClientState *nc) | |
461 | { | |
462 | VirtIONet *n = qemu_get_nic_opaque(nc); | |
463 | ||
464 | if (nc->rxfilter_notify_enabled) { | |
465 | char *path = object_get_canonical_path(OBJECT(n->qdev)); | |
466 | qapi_event_send_nic_rx_filter_changed(n->netclient_name, path); | |
467 | g_free(path); | |
468 | ||
469 | /* disable event notification to avoid events flooding */ | |
470 | nc->rxfilter_notify_enabled = 0; | |
471 | } | |
472 | } | |
473 | ||
474 | static intList *get_vlan_table(VirtIONet *n) | |
475 | { | |
476 | intList *list; | |
477 | int i, j; | |
478 | ||
479 | list = NULL; | |
480 | for (i = 0; i < MAX_VLAN >> 5; i++) { | |
481 | for (j = 0; n->vlans[i] && j <= 0x1f; j++) { | |
482 | if (n->vlans[i] & (1U << j)) { | |
483 | QAPI_LIST_PREPEND(list, (i << 5) + j); | |
484 | } | |
485 | } | |
486 | } | |
487 | ||
488 | return list; | |
489 | } | |
490 | ||
491 | static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) | |
492 | { | |
493 | VirtIONet *n = qemu_get_nic_opaque(nc); | |
494 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
495 | RxFilterInfo *info; | |
496 | strList *str_list; | |
497 | int i; | |
498 | ||
499 | info = g_malloc0(sizeof(*info)); | |
500 | info->name = g_strdup(nc->name); | |
501 | info->promiscuous = n->promisc; | |
502 | ||
503 | if (n->nouni) { | |
504 | info->unicast = RX_STATE_NONE; | |
505 | } else if (n->alluni) { | |
506 | info->unicast = RX_STATE_ALL; | |
507 | } else { | |
508 | info->unicast = RX_STATE_NORMAL; | |
509 | } | |
510 | ||
511 | if (n->nomulti) { | |
512 | info->multicast = RX_STATE_NONE; | |
513 | } else if (n->allmulti) { | |
514 | info->multicast = RX_STATE_ALL; | |
515 | } else { | |
516 | info->multicast = RX_STATE_NORMAL; | |
517 | } | |
518 | ||
519 | info->broadcast_allowed = n->nobcast; | |
520 | info->multicast_overflow = n->mac_table.multi_overflow; | |
521 | info->unicast_overflow = n->mac_table.uni_overflow; | |
522 | ||
523 | info->main_mac = qemu_mac_strdup_printf(n->mac); | |
524 | ||
525 | str_list = NULL; | |
526 | for (i = 0; i < n->mac_table.first_multi; i++) { | |
527 | QAPI_LIST_PREPEND(str_list, | |
528 | qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); | |
529 | } | |
530 | info->unicast_table = str_list; | |
531 | ||
532 | str_list = NULL; | |
533 | for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { | |
534 | QAPI_LIST_PREPEND(str_list, | |
535 | qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); | |
536 | } | |
537 | info->multicast_table = str_list; | |
538 | info->vlan_table = get_vlan_table(n); | |
539 | ||
540 | if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { | |
541 | info->vlan = RX_STATE_ALL; | |
542 | } else if (!info->vlan_table) { | |
543 | info->vlan = RX_STATE_NONE; | |
544 | } else { | |
545 | info->vlan = RX_STATE_NORMAL; | |
546 | } | |
547 | ||
548 | /* enable event notification after query */ | |
549 | nc->rxfilter_notify_enabled = 1; | |
550 | ||
551 | return info; | |
552 | } | |
553 | ||
554 | static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) | |
555 | { | |
556 | VirtIONet *n = VIRTIO_NET(vdev); | |
557 | NetClientState *nc; | |
558 | ||
559 | /* validate queue_index and skip for cvq */ | |
560 | if (queue_index >= n->max_queue_pairs * 2) { | |
561 | return; | |
562 | } | |
563 | ||
564 | nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); | |
565 | ||
566 | if (!nc->peer) { | |
567 | return; | |
568 | } | |
569 | ||
570 | if (get_vhost_net(nc->peer) && | |
571 | nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { | |
572 | vhost_net_virtqueue_reset(vdev, nc, queue_index); | |
573 | } | |
574 | ||
575 | flush_or_purge_queued_packets(nc); | |
576 | } | |
577 | ||
578 | static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) | |
579 | { | |
580 | VirtIONet *n = VIRTIO_NET(vdev); | |
581 | NetClientState *nc; | |
582 | int r; | |
583 | ||
584 | /* validate queue_index and skip for cvq */ | |
585 | if (queue_index >= n->max_queue_pairs * 2) { | |
586 | return; | |
587 | } | |
588 | ||
589 | nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); | |
590 | ||
591 | if (!nc->peer || !vdev->vhost_started) { | |
592 | return; | |
593 | } | |
594 | ||
595 | if (get_vhost_net(nc->peer) && | |
596 | nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { | |
597 | r = vhost_net_virtqueue_restart(vdev, nc, queue_index); | |
598 | if (r < 0) { | |
599 | error_report("unable to restart vhost net virtqueue: %d, " | |
600 | "when resetting the queue", queue_index); | |
601 | } | |
602 | } | |
603 | } | |
604 | ||
605 | static void peer_test_vnet_hdr(VirtIONet *n) | |
606 | { | |
607 | NetClientState *nc = qemu_get_queue(n->nic); | |
608 | if (!nc->peer) { | |
609 | return; | |
610 | } | |
611 | ||
612 | n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); | |
613 | } | |
614 | ||
615 | static int peer_has_vnet_hdr(VirtIONet *n) | |
616 | { | |
617 | return n->has_vnet_hdr; | |
618 | } | |
619 | ||
620 | static int peer_has_ufo(VirtIONet *n) | |
621 | { | |
622 | if (!peer_has_vnet_hdr(n)) | |
623 | return 0; | |
624 | ||
625 | n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); | |
626 | ||
627 | return n->has_ufo; | |
628 | } | |
629 | ||
630 | static int peer_has_uso(VirtIONet *n) | |
631 | { | |
632 | if (!peer_has_vnet_hdr(n)) { | |
633 | return 0; | |
634 | } | |
635 | ||
636 | return qemu_has_uso(qemu_get_queue(n->nic)->peer); | |
637 | } | |
638 | ||
639 | static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, | |
640 | int version_1, int hash_report) | |
641 | { | |
642 | int i; | |
643 | NetClientState *nc; | |
644 | ||
645 | n->mergeable_rx_bufs = mergeable_rx_bufs; | |
646 | ||
647 | if (version_1) { | |
648 | n->guest_hdr_len = hash_report ? | |
649 | sizeof(struct virtio_net_hdr_v1_hash) : | |
650 | sizeof(struct virtio_net_hdr_mrg_rxbuf); | |
651 | n->rss_data.populate_hash = !!hash_report; | |
652 | } else { | |
653 | n->guest_hdr_len = n->mergeable_rx_bufs ? | |
654 | sizeof(struct virtio_net_hdr_mrg_rxbuf) : | |
655 | sizeof(struct virtio_net_hdr); | |
656 | n->rss_data.populate_hash = false; | |
657 | } | |
658 | ||
659 | for (i = 0; i < n->max_queue_pairs; i++) { | |
660 | nc = qemu_get_subqueue(n->nic, i); | |
661 | ||
662 | if (peer_has_vnet_hdr(n) && | |
663 | qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { | |
664 | qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); | |
665 | n->host_hdr_len = n->guest_hdr_len; | |
666 | } | |
667 | } | |
668 | } | |
669 | ||
670 | static int virtio_net_max_tx_queue_size(VirtIONet *n) | |
671 | { | |
672 | NetClientState *peer = n->nic_conf.peers.ncs[0]; | |
673 | ||
674 | /* | |
675 | * Backends other than vhost-user or vhost-vdpa don't support max queue | |
676 | * size. | |
677 | */ | |
678 | if (!peer) { | |
679 | return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; | |
680 | } | |
681 | ||
682 | switch(peer->info->type) { | |
683 | case NET_CLIENT_DRIVER_VHOST_USER: | |
684 | case NET_CLIENT_DRIVER_VHOST_VDPA: | |
685 | return VIRTQUEUE_MAX_SIZE; | |
686 | default: | |
687 | return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; | |
688 | }; | |
689 | } | |
690 | ||
691 | static int peer_attach(VirtIONet *n, int index) | |
692 | { | |
693 | NetClientState *nc = qemu_get_subqueue(n->nic, index); | |
694 | ||
695 | if (!nc->peer) { | |
696 | return 0; | |
697 | } | |
698 | ||
699 | if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { | |
700 | vhost_set_vring_enable(nc->peer, 1); | |
701 | } | |
702 | ||
703 | if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { | |
704 | return 0; | |
705 | } | |
706 | ||
707 | if (n->max_queue_pairs == 1) { | |
708 | return 0; | |
709 | } | |
710 | ||
711 | return tap_enable(nc->peer); | |
712 | } | |
713 | ||
714 | static int peer_detach(VirtIONet *n, int index) | |
715 | { | |
716 | NetClientState *nc = qemu_get_subqueue(n->nic, index); | |
717 | ||
718 | if (!nc->peer) { | |
719 | return 0; | |
720 | } | |
721 | ||
722 | if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { | |
723 | vhost_set_vring_enable(nc->peer, 0); | |
724 | } | |
725 | ||
726 | if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { | |
727 | return 0; | |
728 | } | |
729 | ||
730 | return tap_disable(nc->peer); | |
731 | } | |
732 | ||
733 | static void virtio_net_set_queue_pairs(VirtIONet *n) | |
734 | { | |
735 | int i; | |
736 | int r; | |
737 | ||
738 | if (n->nic->peer_deleted) { | |
739 | return; | |
740 | } | |
741 | ||
742 | for (i = 0; i < n->max_queue_pairs; i++) { | |
743 | if (i < n->curr_queue_pairs) { | |
744 | r = peer_attach(n, i); | |
745 | assert(!r); | |
746 | } else { | |
747 | r = peer_detach(n, i); | |
748 | assert(!r); | |
749 | } | |
750 | } | |
751 | } | |
752 | ||
753 | static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); | |
754 | ||
755 | static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, | |
756 | Error **errp) | |
757 | { | |
758 | VirtIONet *n = VIRTIO_NET(vdev); | |
759 | NetClientState *nc = qemu_get_queue(n->nic); | |
760 | ||
761 | /* Firstly sync all virtio-net possible supported features */ | |
762 | features |= n->host_features; | |
763 | ||
764 | virtio_add_feature(&features, VIRTIO_NET_F_MAC); | |
765 | ||
766 | if (!peer_has_vnet_hdr(n)) { | |
767 | virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); | |
768 | virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); | |
769 | virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); | |
770 | virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); | |
771 | ||
772 | virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); | |
773 | virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); | |
774 | virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); | |
775 | virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); | |
776 | ||
777 | virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); | |
778 | virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); | |
779 | virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); | |
780 | ||
781 | virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); | |
782 | } | |
783 | ||
784 | if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { | |
785 | virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); | |
786 | virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); | |
787 | } | |
788 | ||
789 | if (!peer_has_uso(n)) { | |
790 | virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); | |
791 | virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); | |
792 | virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); | |
793 | } | |
794 | ||
795 | if (!get_vhost_net(nc->peer)) { | |
796 | return features; | |
797 | } | |
798 | ||
799 | if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { | |
800 | virtio_clear_feature(&features, VIRTIO_NET_F_RSS); | |
801 | } | |
802 | features = vhost_net_get_features(get_vhost_net(nc->peer), features); | |
803 | vdev->backend_features = features; | |
804 | ||
805 | if (n->mtu_bypass_backend && | |
806 | (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { | |
807 | features |= (1ULL << VIRTIO_NET_F_MTU); | |
808 | } | |
809 | ||
810 | /* | |
811 | * Since GUEST_ANNOUNCE is emulated the feature bit could be set without | |
812 | * enabled. This happens in the vDPA case. | |
813 | * | |
814 | * Make sure the feature set is not incoherent, as the driver could refuse | |
815 | * to start. | |
816 | * | |
817 | * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, | |
818 | * helping guest to notify the new location with vDPA devices that does not | |
819 | * support it. | |
820 | */ | |
821 | if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { | |
822 | virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); | |
823 | } | |
824 | ||
825 | return features; | |
826 | } | |
827 | ||
828 | static uint64_t virtio_net_bad_features(VirtIODevice *vdev) | |
829 | { | |
830 | uint64_t features = 0; | |
831 | ||
832 | /* Linux kernel 2.6.25. It understood MAC (as everyone must), | |
833 | * but also these: */ | |
834 | virtio_add_feature(&features, VIRTIO_NET_F_MAC); | |
835 | virtio_add_feature(&features, VIRTIO_NET_F_CSUM); | |
836 | virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); | |
837 | virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); | |
838 | virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); | |
839 | ||
840 | return features; | |
841 | } | |
842 | ||
843 | static void virtio_net_apply_guest_offloads(VirtIONet *n) | |
844 | { | |
845 | qemu_set_offload(qemu_get_queue(n->nic)->peer, | |
846 | !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), | |
847 | !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), | |
848 | !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), | |
849 | !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), | |
850 | !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)), | |
851 | !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)), | |
852 | !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6))); | |
853 | } | |
854 | ||
855 | static uint64_t virtio_net_guest_offloads_by_features(uint64_t features) | |
856 | { | |
857 | static const uint64_t guest_offloads_mask = | |
858 | (1ULL << VIRTIO_NET_F_GUEST_CSUM) | | |
859 | (1ULL << VIRTIO_NET_F_GUEST_TSO4) | | |
860 | (1ULL << VIRTIO_NET_F_GUEST_TSO6) | | |
861 | (1ULL << VIRTIO_NET_F_GUEST_ECN) | | |
862 | (1ULL << VIRTIO_NET_F_GUEST_UFO) | | |
863 | (1ULL << VIRTIO_NET_F_GUEST_USO4) | | |
864 | (1ULL << VIRTIO_NET_F_GUEST_USO6); | |
865 | ||
866 | return guest_offloads_mask & features; | |
867 | } | |
868 | ||
869 | uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) | |
870 | { | |
871 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
872 | return virtio_net_guest_offloads_by_features(vdev->guest_features); | |
873 | } | |
874 | ||
875 | typedef struct { | |
876 | VirtIONet *n; | |
877 | DeviceState *dev; | |
878 | } FailoverDevice; | |
879 | ||
880 | /** | |
881 | * Set the failover primary device | |
882 | * | |
883 | * @opaque: FailoverId to setup | |
884 | * @opts: opts for device we are handling | |
885 | * @errp: returns an error if this function fails | |
886 | */ | |
887 | static int failover_set_primary(DeviceState *dev, void *opaque) | |
888 | { | |
889 | FailoverDevice *fdev = opaque; | |
890 | PCIDevice *pci_dev = (PCIDevice *) | |
891 | object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); | |
892 | ||
893 | if (!pci_dev) { | |
894 | return 0; | |
895 | } | |
896 | ||
897 | if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { | |
898 | fdev->dev = dev; | |
899 | return 1; | |
900 | } | |
901 | ||
902 | return 0; | |
903 | } | |
904 | ||
905 | /** | |
906 | * Find the primary device for this failover virtio-net | |
907 | * | |
908 | * @n: VirtIONet device | |
909 | * @errp: returns an error if this function fails | |
910 | */ | |
911 | static DeviceState *failover_find_primary_device(VirtIONet *n) | |
912 | { | |
913 | FailoverDevice fdev = { | |
914 | .n = n, | |
915 | }; | |
916 | ||
917 | qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, | |
918 | NULL, NULL, &fdev); | |
919 | return fdev.dev; | |
920 | } | |
921 | ||
922 | static void failover_add_primary(VirtIONet *n, Error **errp) | |
923 | { | |
924 | Error *err = NULL; | |
925 | DeviceState *dev = failover_find_primary_device(n); | |
926 | ||
927 | if (dev) { | |
928 | return; | |
929 | } | |
930 | ||
931 | if (!n->primary_opts) { | |
932 | error_setg(errp, "Primary device not found"); | |
933 | error_append_hint(errp, "Virtio-net failover will not work. Make " | |
934 | "sure primary device has parameter" | |
935 | " failover_pair_id=%s\n", n->netclient_name); | |
936 | return; | |
937 | } | |
938 | ||
939 | dev = qdev_device_add_from_qdict(n->primary_opts, | |
940 | n->primary_opts_from_json, | |
941 | &err); | |
942 | if (err) { | |
943 | qobject_unref(n->primary_opts); | |
944 | n->primary_opts = NULL; | |
945 | } else { | |
946 | object_unref(OBJECT(dev)); | |
947 | } | |
948 | error_propagate(errp, err); | |
949 | } | |
950 | ||
951 | static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) | |
952 | { | |
953 | VirtIONet *n = VIRTIO_NET(vdev); | |
954 | Error *err = NULL; | |
955 | int i; | |
956 | ||
957 | if (n->mtu_bypass_backend && | |
958 | !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { | |
959 | features &= ~(1ULL << VIRTIO_NET_F_MTU); | |
960 | } | |
961 | ||
962 | virtio_net_set_multiqueue(n, | |
963 | virtio_has_feature(features, VIRTIO_NET_F_RSS) || | |
964 | virtio_has_feature(features, VIRTIO_NET_F_MQ)); | |
965 | ||
966 | virtio_net_set_mrg_rx_bufs(n, | |
967 | virtio_has_feature(features, | |
968 | VIRTIO_NET_F_MRG_RXBUF), | |
969 | virtio_has_feature(features, | |
970 | VIRTIO_F_VERSION_1), | |
971 | virtio_has_feature(features, | |
972 | VIRTIO_NET_F_HASH_REPORT)); | |
973 | ||
974 | n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && | |
975 | virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); | |
976 | n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && | |
977 | virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); | |
978 | n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); | |
979 | ||
980 | if (n->has_vnet_hdr) { | |
981 | n->curr_guest_offloads = | |
982 | virtio_net_guest_offloads_by_features(features); | |
983 | virtio_net_apply_guest_offloads(n); | |
984 | } | |
985 | ||
986 | for (i = 0; i < n->max_queue_pairs; i++) { | |
987 | NetClientState *nc = qemu_get_subqueue(n->nic, i); | |
988 | ||
989 | if (!get_vhost_net(nc->peer)) { | |
990 | continue; | |
991 | } | |
992 | vhost_net_ack_features(get_vhost_net(nc->peer), features); | |
993 | ||
994 | /* | |
995 | * keep acked_features in NetVhostUserState up-to-date so it | |
996 | * can't miss any features configured by guest virtio driver. | |
997 | */ | |
998 | vhost_net_save_acked_features(nc->peer); | |
999 | } | |
1000 | ||
1001 | if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { | |
1002 | memset(n->vlans, 0xff, MAX_VLAN >> 3); | |
1003 | } | |
1004 | ||
1005 | if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { | |
1006 | qapi_event_send_failover_negotiated(n->netclient_name); | |
1007 | qatomic_set(&n->failover_primary_hidden, false); | |
1008 | failover_add_primary(n, &err); | |
1009 | if (err) { | |
1010 | if (!qtest_enabled()) { | |
1011 | warn_report_err(err); | |
1012 | } else { | |
1013 | error_free(err); | |
1014 | } | |
1015 | } | |
1016 | } | |
1017 | } | |
1018 | ||
1019 | static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, | |
1020 | struct iovec *iov, unsigned int iov_cnt) | |
1021 | { | |
1022 | uint8_t on; | |
1023 | size_t s; | |
1024 | NetClientState *nc = qemu_get_queue(n->nic); | |
1025 | ||
1026 | s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); | |
1027 | if (s != sizeof(on)) { | |
1028 | return VIRTIO_NET_ERR; | |
1029 | } | |
1030 | ||
1031 | if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { | |
1032 | n->promisc = on; | |
1033 | } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { | |
1034 | n->allmulti = on; | |
1035 | } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { | |
1036 | n->alluni = on; | |
1037 | } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { | |
1038 | n->nomulti = on; | |
1039 | } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { | |
1040 | n->nouni = on; | |
1041 | } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { | |
1042 | n->nobcast = on; | |
1043 | } else { | |
1044 | return VIRTIO_NET_ERR; | |
1045 | } | |
1046 | ||
1047 | rxfilter_notify(nc); | |
1048 | ||
1049 | return VIRTIO_NET_OK; | |
1050 | } | |
1051 | ||
1052 | static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, | |
1053 | struct iovec *iov, unsigned int iov_cnt) | |
1054 | { | |
1055 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
1056 | uint64_t offloads; | |
1057 | size_t s; | |
1058 | ||
1059 | if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { | |
1060 | return VIRTIO_NET_ERR; | |
1061 | } | |
1062 | ||
1063 | s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); | |
1064 | if (s != sizeof(offloads)) { | |
1065 | return VIRTIO_NET_ERR; | |
1066 | } | |
1067 | ||
1068 | if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { | |
1069 | uint64_t supported_offloads; | |
1070 | ||
1071 | offloads = virtio_ldq_p(vdev, &offloads); | |
1072 | ||
1073 | if (!n->has_vnet_hdr) { | |
1074 | return VIRTIO_NET_ERR; | |
1075 | } | |
1076 | ||
1077 | n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && | |
1078 | virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); | |
1079 | n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && | |
1080 | virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); | |
1081 | virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); | |
1082 | ||
1083 | supported_offloads = virtio_net_supported_guest_offloads(n); | |
1084 | if (offloads & ~supported_offloads) { | |
1085 | return VIRTIO_NET_ERR; | |
1086 | } | |
1087 | ||
1088 | n->curr_guest_offloads = offloads; | |
1089 | virtio_net_apply_guest_offloads(n); | |
1090 | ||
1091 | return VIRTIO_NET_OK; | |
1092 | } else { | |
1093 | return VIRTIO_NET_ERR; | |
1094 | } | |
1095 | } | |
1096 | ||
1097 | static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, | |
1098 | struct iovec *iov, unsigned int iov_cnt) | |
1099 | { | |
1100 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
1101 | struct virtio_net_ctrl_mac mac_data; | |
1102 | size_t s; | |
1103 | NetClientState *nc = qemu_get_queue(n->nic); | |
1104 | ||
1105 | if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { | |
1106 | if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { | |
1107 | return VIRTIO_NET_ERR; | |
1108 | } | |
1109 | s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); | |
1110 | assert(s == sizeof(n->mac)); | |
1111 | qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); | |
1112 | rxfilter_notify(nc); | |
1113 | ||
1114 | return VIRTIO_NET_OK; | |
1115 | } | |
1116 | ||
1117 | if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { | |
1118 | return VIRTIO_NET_ERR; | |
1119 | } | |
1120 | ||
1121 | int in_use = 0; | |
1122 | int first_multi = 0; | |
1123 | uint8_t uni_overflow = 0; | |
1124 | uint8_t multi_overflow = 0; | |
1125 | uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); | |
1126 | ||
1127 | s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, | |
1128 | sizeof(mac_data.entries)); | |
1129 | mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); | |
1130 | if (s != sizeof(mac_data.entries)) { | |
1131 | goto error; | |
1132 | } | |
1133 | iov_discard_front(&iov, &iov_cnt, s); | |
1134 | ||
1135 | if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { | |
1136 | goto error; | |
1137 | } | |
1138 | ||
1139 | if (mac_data.entries <= MAC_TABLE_ENTRIES) { | |
1140 | s = iov_to_buf(iov, iov_cnt, 0, macs, | |
1141 | mac_data.entries * ETH_ALEN); | |
1142 | if (s != mac_data.entries * ETH_ALEN) { | |
1143 | goto error; | |
1144 | } | |
1145 | in_use += mac_data.entries; | |
1146 | } else { | |
1147 | uni_overflow = 1; | |
1148 | } | |
1149 | ||
1150 | iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); | |
1151 | ||
1152 | first_multi = in_use; | |
1153 | ||
1154 | s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, | |
1155 | sizeof(mac_data.entries)); | |
1156 | mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); | |
1157 | if (s != sizeof(mac_data.entries)) { | |
1158 | goto error; | |
1159 | } | |
1160 | ||
1161 | iov_discard_front(&iov, &iov_cnt, s); | |
1162 | ||
1163 | if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { | |
1164 | goto error; | |
1165 | } | |
1166 | ||
1167 | if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { | |
1168 | s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], | |
1169 | mac_data.entries * ETH_ALEN); | |
1170 | if (s != mac_data.entries * ETH_ALEN) { | |
1171 | goto error; | |
1172 | } | |
1173 | in_use += mac_data.entries; | |
1174 | } else { | |
1175 | multi_overflow = 1; | |
1176 | } | |
1177 | ||
1178 | n->mac_table.in_use = in_use; | |
1179 | n->mac_table.first_multi = first_multi; | |
1180 | n->mac_table.uni_overflow = uni_overflow; | |
1181 | n->mac_table.multi_overflow = multi_overflow; | |
1182 | memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); | |
1183 | g_free(macs); | |
1184 | rxfilter_notify(nc); | |
1185 | ||
1186 | return VIRTIO_NET_OK; | |
1187 | ||
1188 | error: | |
1189 | g_free(macs); | |
1190 | return VIRTIO_NET_ERR; | |
1191 | } | |
1192 | ||
1193 | static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, | |
1194 | struct iovec *iov, unsigned int iov_cnt) | |
1195 | { | |
1196 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
1197 | uint16_t vid; | |
1198 | size_t s; | |
1199 | NetClientState *nc = qemu_get_queue(n->nic); | |
1200 | ||
1201 | s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); | |
1202 | vid = virtio_lduw_p(vdev, &vid); | |
1203 | if (s != sizeof(vid)) { | |
1204 | return VIRTIO_NET_ERR; | |
1205 | } | |
1206 | ||
1207 | if (vid >= MAX_VLAN) | |
1208 | return VIRTIO_NET_ERR; | |
1209 | ||
1210 | if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) | |
1211 | n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); | |
1212 | else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) | |
1213 | n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); | |
1214 | else | |
1215 | return VIRTIO_NET_ERR; | |
1216 | ||
1217 | rxfilter_notify(nc); | |
1218 | ||
1219 | return VIRTIO_NET_OK; | |
1220 | } | |
1221 | ||
1222 | static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, | |
1223 | struct iovec *iov, unsigned int iov_cnt) | |
1224 | { | |
1225 | trace_virtio_net_handle_announce(n->announce_timer.round); | |
1226 | if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && | |
1227 | n->status & VIRTIO_NET_S_ANNOUNCE) { | |
1228 | n->status &= ~VIRTIO_NET_S_ANNOUNCE; | |
1229 | if (n->announce_timer.round) { | |
1230 | qemu_announce_timer_step(&n->announce_timer); | |
1231 | } | |
1232 | return VIRTIO_NET_OK; | |
1233 | } else { | |
1234 | return VIRTIO_NET_ERR; | |
1235 | } | |
1236 | } | |
1237 | ||
1238 | static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) | |
1239 | { | |
1240 | NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); | |
1241 | if (nc == NULL || nc->info->set_steering_ebpf == NULL) { | |
1242 | return false; | |
1243 | } | |
1244 | ||
1245 | trace_virtio_net_rss_attach_ebpf(nic, prog_fd); | |
1246 | return nc->info->set_steering_ebpf(nc, prog_fd); | |
1247 | } | |
1248 | ||
1249 | static void rss_data_to_rss_config(struct VirtioNetRssData *data, | |
1250 | struct EBPFRSSConfig *config) | |
1251 | { | |
1252 | config->redirect = data->redirect; | |
1253 | config->populate_hash = data->populate_hash; | |
1254 | config->hash_types = data->hash_types; | |
1255 | config->indirections_len = data->indirections_len; | |
1256 | config->default_queue = data->default_queue; | |
1257 | } | |
1258 | ||
1259 | static bool virtio_net_attach_ebpf_rss(VirtIONet *n) | |
1260 | { | |
1261 | struct EBPFRSSConfig config = {}; | |
1262 | ||
1263 | if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { | |
1264 | return false; | |
1265 | } | |
1266 | ||
1267 | rss_data_to_rss_config(&n->rss_data, &config); | |
1268 | ||
1269 | if (!ebpf_rss_set_all(&n->ebpf_rss, &config, | |
1270 | n->rss_data.indirections_table, n->rss_data.key, | |
1271 | NULL)) { | |
1272 | return false; | |
1273 | } | |
1274 | ||
1275 | if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { | |
1276 | return false; | |
1277 | } | |
1278 | ||
1279 | return true; | |
1280 | } | |
1281 | ||
1282 | static void virtio_net_detach_ebpf_rss(VirtIONet *n) | |
1283 | { | |
1284 | virtio_net_attach_ebpf_to_backend(n->nic, -1); | |
1285 | } | |
1286 | ||
1287 | static void virtio_net_commit_rss_config(VirtIONet *n) | |
1288 | { | |
1289 | if (n->rss_data.enabled) { | |
1290 | n->rss_data.enabled_software_rss = n->rss_data.populate_hash; | |
1291 | if (n->rss_data.populate_hash) { | |
1292 | virtio_net_detach_ebpf_rss(n); | |
1293 | } else if (!virtio_net_attach_ebpf_rss(n)) { | |
1294 | if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { | |
1295 | warn_report("Can't load eBPF RSS for vhost"); | |
1296 | } else { | |
1297 | warn_report("Can't load eBPF RSS - fallback to software RSS"); | |
1298 | n->rss_data.enabled_software_rss = true; | |
1299 | } | |
1300 | } | |
1301 | ||
1302 | trace_virtio_net_rss_enable(n, | |
1303 | n->rss_data.hash_types, | |
1304 | n->rss_data.indirections_len, | |
1305 | sizeof(n->rss_data.key)); | |
1306 | } else { | |
1307 | virtio_net_detach_ebpf_rss(n); | |
1308 | trace_virtio_net_rss_disable(n); | |
1309 | } | |
1310 | } | |
1311 | ||
1312 | static void virtio_net_disable_rss(VirtIONet *n) | |
1313 | { | |
1314 | if (!n->rss_data.enabled) { | |
1315 | return; | |
1316 | } | |
1317 | ||
1318 | n->rss_data.enabled = false; | |
1319 | virtio_net_commit_rss_config(n); | |
1320 | } | |
1321 | ||
1322 | static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp) | |
1323 | { | |
1324 | int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1}; | |
1325 | int ret = true; | |
1326 | int i = 0; | |
1327 | ||
1328 | if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) { | |
1329 | error_setg(errp, "Expected %d file descriptors but got %d", | |
1330 | EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); | |
1331 | return false; | |
1332 | } | |
1333 | ||
1334 | for (i = 0; i < n->nr_ebpf_rss_fds; i++) { | |
1335 | fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp); | |
1336 | if (fds[i] < 0) { | |
1337 | ret = false; | |
1338 | goto exit; | |
1339 | } | |
1340 | } | |
1341 | ||
1342 | ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp); | |
1343 | ||
1344 | exit: | |
1345 | if (!ret) { | |
1346 | for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) { | |
1347 | close(fds[i]); | |
1348 | } | |
1349 | } | |
1350 | ||
1351 | return ret; | |
1352 | } | |
1353 | ||
1354 | static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp) | |
1355 | { | |
1356 | if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { | |
1357 | return true; | |
1358 | } | |
1359 | ||
1360 | trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds); | |
1361 | ||
1362 | /* | |
1363 | * If user explicitly gave QEMU RSS FDs to use, then | |
1364 | * failing to use them must be considered a fatal | |
1365 | * error. If no RSS FDs were provided, QEMU is trying | |
1366 | * eBPF on a "best effort" basis only, so report a | |
1367 | * warning and allow fallback to software RSS. | |
1368 | */ | |
1369 | if (n->ebpf_rss_fds) { | |
1370 | return virtio_net_load_ebpf_fds(n, errp); | |
1371 | } | |
1372 | ||
1373 | ebpf_rss_load(&n->ebpf_rss, &error_warn); | |
1374 | return true; | |
1375 | } | |
1376 | ||
1377 | static void virtio_net_unload_ebpf(VirtIONet *n) | |
1378 | { | |
1379 | virtio_net_attach_ebpf_to_backend(n->nic, -1); | |
1380 | ebpf_rss_unload(&n->ebpf_rss); | |
1381 | } | |
1382 | ||
1383 | static uint16_t virtio_net_handle_rss(VirtIONet *n, | |
1384 | struct iovec *iov, | |
1385 | unsigned int iov_cnt, | |
1386 | bool do_rss) | |
1387 | { | |
1388 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
1389 | struct virtio_net_rss_config cfg; | |
1390 | size_t s, offset = 0, size_get; | |
1391 | uint16_t queue_pairs, i; | |
1392 | struct { | |
1393 | uint16_t us; | |
1394 | uint8_t b; | |
1395 | } QEMU_PACKED temp; | |
1396 | const char *err_msg = ""; | |
1397 | uint32_t err_value = 0; | |
1398 | ||
1399 | if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { | |
1400 | err_msg = "RSS is not negotiated"; | |
1401 | goto error; | |
1402 | } | |
1403 | if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { | |
1404 | err_msg = "Hash report is not negotiated"; | |
1405 | goto error; | |
1406 | } | |
1407 | size_get = offsetof(struct virtio_net_rss_config, indirection_table); | |
1408 | s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); | |
1409 | if (s != size_get) { | |
1410 | err_msg = "Short command buffer"; | |
1411 | err_value = (uint32_t)s; | |
1412 | goto error; | |
1413 | } | |
1414 | n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); | |
1415 | n->rss_data.indirections_len = | |
1416 | virtio_lduw_p(vdev, &cfg.indirection_table_mask); | |
1417 | if (!do_rss) { | |
1418 | n->rss_data.indirections_len = 0; | |
1419 | } | |
1420 | if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) { | |
1421 | err_msg = "Too large indirection table"; | |
1422 | err_value = n->rss_data.indirections_len; | |
1423 | goto error; | |
1424 | } | |
1425 | n->rss_data.indirections_len++; | |
1426 | if (!is_power_of_2(n->rss_data.indirections_len)) { | |
1427 | err_msg = "Invalid size of indirection table"; | |
1428 | err_value = n->rss_data.indirections_len; | |
1429 | goto error; | |
1430 | } | |
1431 | n->rss_data.default_queue = do_rss ? | |
1432 | virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; | |
1433 | if (n->rss_data.default_queue >= n->max_queue_pairs) { | |
1434 | err_msg = "Invalid default queue"; | |
1435 | err_value = n->rss_data.default_queue; | |
1436 | goto error; | |
1437 | } | |
1438 | offset += size_get; | |
1439 | size_get = sizeof(uint16_t) * n->rss_data.indirections_len; | |
1440 | g_free(n->rss_data.indirections_table); | |
1441 | n->rss_data.indirections_table = g_malloc(size_get); | |
1442 | if (!n->rss_data.indirections_table) { | |
1443 | err_msg = "Can't allocate indirections table"; | |
1444 | err_value = n->rss_data.indirections_len; | |
1445 | goto error; | |
1446 | } | |
1447 | s = iov_to_buf(iov, iov_cnt, offset, | |
1448 | n->rss_data.indirections_table, size_get); | |
1449 | if (s != size_get) { | |
1450 | err_msg = "Short indirection table buffer"; | |
1451 | err_value = (uint32_t)s; | |
1452 | goto error; | |
1453 | } | |
1454 | for (i = 0; i < n->rss_data.indirections_len; ++i) { | |
1455 | uint16_t val = n->rss_data.indirections_table[i]; | |
1456 | n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); | |
1457 | } | |
1458 | offset += size_get; | |
1459 | size_get = sizeof(temp); | |
1460 | s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); | |
1461 | if (s != size_get) { | |
1462 | err_msg = "Can't get queue_pairs"; | |
1463 | err_value = (uint32_t)s; | |
1464 | goto error; | |
1465 | } | |
1466 | queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; | |
1467 | if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { | |
1468 | err_msg = "Invalid number of queue_pairs"; | |
1469 | err_value = queue_pairs; | |
1470 | goto error; | |
1471 | } | |
1472 | if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { | |
1473 | err_msg = "Invalid key size"; | |
1474 | err_value = temp.b; | |
1475 | goto error; | |
1476 | } | |
1477 | if (!temp.b && n->rss_data.hash_types) { | |
1478 | err_msg = "No key provided"; | |
1479 | err_value = 0; | |
1480 | goto error; | |
1481 | } | |
1482 | if (!temp.b && !n->rss_data.hash_types) { | |
1483 | virtio_net_disable_rss(n); | |
1484 | return queue_pairs; | |
1485 | } | |
1486 | offset += size_get; | |
1487 | size_get = temp.b; | |
1488 | s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); | |
1489 | if (s != size_get) { | |
1490 | err_msg = "Can get key buffer"; | |
1491 | err_value = (uint32_t)s; | |
1492 | goto error; | |
1493 | } | |
1494 | n->rss_data.enabled = true; | |
1495 | virtio_net_commit_rss_config(n); | |
1496 | return queue_pairs; | |
1497 | error: | |
1498 | trace_virtio_net_rss_error(n, err_msg, err_value); | |
1499 | virtio_net_disable_rss(n); | |
1500 | return 0; | |
1501 | } | |
1502 | ||
1503 | static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, | |
1504 | struct iovec *iov, unsigned int iov_cnt) | |
1505 | { | |
1506 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
1507 | uint16_t queue_pairs; | |
1508 | NetClientState *nc = qemu_get_queue(n->nic); | |
1509 | ||
1510 | virtio_net_disable_rss(n); | |
1511 | if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { | |
1512 | queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); | |
1513 | return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; | |
1514 | } | |
1515 | if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { | |
1516 | queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); | |
1517 | } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { | |
1518 | struct virtio_net_ctrl_mq mq; | |
1519 | size_t s; | |
1520 | if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { | |
1521 | return VIRTIO_NET_ERR; | |
1522 | } | |
1523 | s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); | |
1524 | if (s != sizeof(mq)) { | |
1525 | return VIRTIO_NET_ERR; | |
1526 | } | |
1527 | queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); | |
1528 | ||
1529 | } else { | |
1530 | return VIRTIO_NET_ERR; | |
1531 | } | |
1532 | ||
1533 | if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || | |
1534 | queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || | |
1535 | queue_pairs > n->max_queue_pairs || | |
1536 | !n->multiqueue) { | |
1537 | return VIRTIO_NET_ERR; | |
1538 | } | |
1539 | ||
1540 | n->curr_queue_pairs = queue_pairs; | |
1541 | if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { | |
1542 | /* | |
1543 | * Avoid updating the backend for a vdpa device: We're only interested | |
1544 | * in updating the device model queues. | |
1545 | */ | |
1546 | return VIRTIO_NET_OK; | |
1547 | } | |
1548 | /* stop the backend before changing the number of queue_pairs to avoid handling a | |
1549 | * disabled queue */ | |
1550 | virtio_net_set_status(vdev, vdev->status); | |
1551 | virtio_net_set_queue_pairs(n); | |
1552 | ||
1553 | return VIRTIO_NET_OK; | |
1554 | } | |
1555 | ||
1556 | size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, | |
1557 | const struct iovec *in_sg, unsigned in_num, | |
1558 | const struct iovec *out_sg, | |
1559 | unsigned out_num) | |
1560 | { | |
1561 | VirtIONet *n = VIRTIO_NET(vdev); | |
1562 | struct virtio_net_ctrl_hdr ctrl; | |
1563 | virtio_net_ctrl_ack status = VIRTIO_NET_ERR; | |
1564 | size_t s; | |
1565 | struct iovec *iov, *iov2; | |
1566 | ||
1567 | if (iov_size(in_sg, in_num) < sizeof(status) || | |
1568 | iov_size(out_sg, out_num) < sizeof(ctrl)) { | |
1569 | virtio_error(vdev, "virtio-net ctrl missing headers"); | |
1570 | return 0; | |
1571 | } | |
1572 | ||
1573 | iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); | |
1574 | s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); | |
1575 | iov_discard_front(&iov, &out_num, sizeof(ctrl)); | |
1576 | if (s != sizeof(ctrl)) { | |
1577 | status = VIRTIO_NET_ERR; | |
1578 | } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { | |
1579 | status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); | |
1580 | } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { | |
1581 | status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); | |
1582 | } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { | |
1583 | status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); | |
1584 | } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { | |
1585 | status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); | |
1586 | } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { | |
1587 | status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); | |
1588 | } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { | |
1589 | status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); | |
1590 | } | |
1591 | ||
1592 | s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); | |
1593 | assert(s == sizeof(status)); | |
1594 | ||
1595 | g_free(iov2); | |
1596 | return sizeof(status); | |
1597 | } | |
1598 | ||
1599 | static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) | |
1600 | { | |
1601 | VirtQueueElement *elem; | |
1602 | ||
1603 | for (;;) { | |
1604 | size_t written; | |
1605 | elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); | |
1606 | if (!elem) { | |
1607 | break; | |
1608 | } | |
1609 | ||
1610 | written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, | |
1611 | elem->out_sg, elem->out_num); | |
1612 | if (written > 0) { | |
1613 | virtqueue_push(vq, elem, written); | |
1614 | virtio_notify(vdev, vq); | |
1615 | g_free(elem); | |
1616 | } else { | |
1617 | virtqueue_detach_element(vq, elem, 0); | |
1618 | g_free(elem); | |
1619 | break; | |
1620 | } | |
1621 | } | |
1622 | } | |
1623 | ||
1624 | /* RX */ | |
1625 | ||
1626 | static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) | |
1627 | { | |
1628 | VirtIONet *n = VIRTIO_NET(vdev); | |
1629 | int queue_index = vq2q(virtio_get_queue_index(vq)); | |
1630 | ||
1631 | qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); | |
1632 | } | |
1633 | ||
1634 | static bool virtio_net_can_receive(NetClientState *nc) | |
1635 | { | |
1636 | VirtIONet *n = qemu_get_nic_opaque(nc); | |
1637 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
1638 | VirtIONetQueue *q = virtio_net_get_subqueue(nc); | |
1639 | ||
1640 | if (!vdev->vm_running) { | |
1641 | return false; | |
1642 | } | |
1643 | ||
1644 | if (nc->queue_index >= n->curr_queue_pairs) { | |
1645 | return false; | |
1646 | } | |
1647 | ||
1648 | if (!virtio_queue_ready(q->rx_vq) || | |
1649 | !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { | |
1650 | return false; | |
1651 | } | |
1652 | ||
1653 | return true; | |
1654 | } | |
1655 | ||
1656 | static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) | |
1657 | { | |
1658 | int opaque; | |
1659 | unsigned int in_bytes; | |
1660 | VirtIONet *n = q->n; | |
1661 | ||
1662 | while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) { | |
1663 | opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL, | |
1664 | bufsize, 0); | |
1665 | /* Buffer is enough, disable notifiaction */ | |
1666 | if (bufsize <= in_bytes) { | |
1667 | break; | |
1668 | } | |
1669 | ||
1670 | if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) { | |
1671 | /* Guest has added some buffers, try again */ | |
1672 | continue; | |
1673 | } else { | |
1674 | return 0; | |
1675 | } | |
1676 | } | |
1677 | ||
1678 | virtio_queue_set_notification(q->rx_vq, 0); | |
1679 | ||
1680 | return 1; | |
1681 | } | |
1682 | ||
1683 | static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) | |
1684 | { | |
1685 | virtio_tswap16s(vdev, &hdr->hdr_len); | |
1686 | virtio_tswap16s(vdev, &hdr->gso_size); | |
1687 | virtio_tswap16s(vdev, &hdr->csum_start); | |
1688 | virtio_tswap16s(vdev, &hdr->csum_offset); | |
1689 | } | |
1690 | ||
1691 | /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so | |
1692 | * it never finds out that the packets don't have valid checksums. This | |
1693 | * causes dhclient to get upset. Fedora's carried a patch for ages to | |
1694 | * fix this with Xen but it hasn't appeared in an upstream release of | |
1695 | * dhclient yet. | |
1696 | * | |
1697 | * To avoid breaking existing guests, we catch udp packets and add | |
1698 | * checksums. This is terrible but it's better than hacking the guest | |
1699 | * kernels. | |
1700 | * | |
1701 | * N.B. if we introduce a zero-copy API, this operation is no longer free so | |
1702 | * we should provide a mechanism to disable it to avoid polluting the host | |
1703 | * cache. | |
1704 | */ | |
1705 | static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, | |
1706 | uint8_t *buf, size_t size) | |
1707 | { | |
1708 | size_t csum_size = ETH_HLEN + sizeof(struct ip_header) + | |
1709 | sizeof(struct udp_header); | |
1710 | ||
1711 | if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ | |
1712 | (size >= csum_size && size < 1500) && /* normal sized MTU */ | |
1713 | (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ | |
1714 | (buf[23] == 17) && /* ip.protocol == UDP */ | |
1715 | (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ | |
1716 | net_checksum_calculate(buf, size, CSUM_UDP); | |
1717 | hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; | |
1718 | } | |
1719 | } | |
1720 | ||
1721 | static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, | |
1722 | const void *buf, size_t size) | |
1723 | { | |
1724 | if (n->has_vnet_hdr) { | |
1725 | /* FIXME this cast is evil */ | |
1726 | void *wbuf = (void *)buf; | |
1727 | work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, | |
1728 | size - n->host_hdr_len); | |
1729 | ||
1730 | if (n->needs_vnet_hdr_swap) { | |
1731 | virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); | |
1732 | } | |
1733 | iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); | |
1734 | } else { | |
1735 | struct virtio_net_hdr hdr = { | |
1736 | .flags = 0, | |
1737 | .gso_type = VIRTIO_NET_HDR_GSO_NONE | |
1738 | }; | |
1739 | iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); | |
1740 | } | |
1741 | } | |
1742 | ||
1743 | static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) | |
1744 | { | |
1745 | static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; | |
1746 | static const uint8_t vlan[] = {0x81, 0x00}; | |
1747 | uint8_t *ptr = (uint8_t *)buf; | |
1748 | int i; | |
1749 | ||
1750 | if (n->promisc) | |
1751 | return 1; | |
1752 | ||
1753 | ptr += n->host_hdr_len; | |
1754 | ||
1755 | if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { | |
1756 | int vid = lduw_be_p(ptr + 14) & 0xfff; | |
1757 | if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) | |
1758 | return 0; | |
1759 | } | |
1760 | ||
1761 | if (ptr[0] & 1) { // multicast | |
1762 | if (!memcmp(ptr, bcast, sizeof(bcast))) { | |
1763 | return !n->nobcast; | |
1764 | } else if (n->nomulti) { | |
1765 | return 0; | |
1766 | } else if (n->allmulti || n->mac_table.multi_overflow) { | |
1767 | return 1; | |
1768 | } | |
1769 | ||
1770 | for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { | |
1771 | if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { | |
1772 | return 1; | |
1773 | } | |
1774 | } | |
1775 | } else { // unicast | |
1776 | if (n->nouni) { | |
1777 | return 0; | |
1778 | } else if (n->alluni || n->mac_table.uni_overflow) { | |
1779 | return 1; | |
1780 | } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { | |
1781 | return 1; | |
1782 | } | |
1783 | ||
1784 | for (i = 0; i < n->mac_table.first_multi; i++) { | |
1785 | if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { | |
1786 | return 1; | |
1787 | } | |
1788 | } | |
1789 | } | |
1790 | ||
1791 | return 0; | |
1792 | } | |
1793 | ||
1794 | static uint8_t virtio_net_get_hash_type(bool hasip4, | |
1795 | bool hasip6, | |
1796 | EthL4HdrProto l4hdr_proto, | |
1797 | uint32_t types) | |
1798 | { | |
1799 | if (hasip4) { | |
1800 | switch (l4hdr_proto) { | |
1801 | case ETH_L4_HDR_PROTO_TCP: | |
1802 | if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { | |
1803 | return NetPktRssIpV4Tcp; | |
1804 | } | |
1805 | break; | |
1806 | ||
1807 | case ETH_L4_HDR_PROTO_UDP: | |
1808 | if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { | |
1809 | return NetPktRssIpV4Udp; | |
1810 | } | |
1811 | break; | |
1812 | ||
1813 | default: | |
1814 | break; | |
1815 | } | |
1816 | ||
1817 | if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { | |
1818 | return NetPktRssIpV4; | |
1819 | } | |
1820 | } else if (hasip6) { | |
1821 | switch (l4hdr_proto) { | |
1822 | case ETH_L4_HDR_PROTO_TCP: | |
1823 | if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) { | |
1824 | return NetPktRssIpV6TcpEx; | |
1825 | } | |
1826 | if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { | |
1827 | return NetPktRssIpV6Tcp; | |
1828 | } | |
1829 | break; | |
1830 | ||
1831 | case ETH_L4_HDR_PROTO_UDP: | |
1832 | if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) { | |
1833 | return NetPktRssIpV6UdpEx; | |
1834 | } | |
1835 | if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { | |
1836 | return NetPktRssIpV6Udp; | |
1837 | } | |
1838 | break; | |
1839 | ||
1840 | default: | |
1841 | break; | |
1842 | } | |
1843 | ||
1844 | if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) { | |
1845 | return NetPktRssIpV6Ex; | |
1846 | } | |
1847 | if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { | |
1848 | return NetPktRssIpV6; | |
1849 | } | |
1850 | } | |
1851 | return 0xff; | |
1852 | } | |
1853 | ||
1854 | static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, | |
1855 | size_t size, | |
1856 | struct virtio_net_hdr_v1_hash *hdr) | |
1857 | { | |
1858 | VirtIONet *n = qemu_get_nic_opaque(nc); | |
1859 | unsigned int index = nc->queue_index, new_index = index; | |
1860 | struct NetRxPkt *pkt = n->rx_pkt; | |
1861 | uint8_t net_hash_type; | |
1862 | uint32_t hash; | |
1863 | bool hasip4, hasip6; | |
1864 | EthL4HdrProto l4hdr_proto; | |
1865 | static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { | |
1866 | VIRTIO_NET_HASH_REPORT_IPv4, | |
1867 | VIRTIO_NET_HASH_REPORT_TCPv4, | |
1868 | VIRTIO_NET_HASH_REPORT_TCPv6, | |
1869 | VIRTIO_NET_HASH_REPORT_IPv6, | |
1870 | VIRTIO_NET_HASH_REPORT_IPv6_EX, | |
1871 | VIRTIO_NET_HASH_REPORT_TCPv6_EX, | |
1872 | VIRTIO_NET_HASH_REPORT_UDPv4, | |
1873 | VIRTIO_NET_HASH_REPORT_UDPv6, | |
1874 | VIRTIO_NET_HASH_REPORT_UDPv6_EX | |
1875 | }; | |
1876 | struct iovec iov = { | |
1877 | .iov_base = (void *)buf, | |
1878 | .iov_len = size | |
1879 | }; | |
1880 | ||
1881 | net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len); | |
1882 | net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); | |
1883 | net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto, | |
1884 | n->rss_data.hash_types); | |
1885 | if (net_hash_type > NetPktRssIpV6UdpEx) { | |
1886 | if (n->rss_data.populate_hash) { | |
1887 | hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE; | |
1888 | hdr->hash_report = 0; | |
1889 | } | |
1890 | return n->rss_data.redirect ? n->rss_data.default_queue : -1; | |
1891 | } | |
1892 | ||
1893 | hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); | |
1894 | ||
1895 | if (n->rss_data.populate_hash) { | |
1896 | hdr->hash_value = hash; | |
1897 | hdr->hash_report = reports[net_hash_type]; | |
1898 | } | |
1899 | ||
1900 | if (n->rss_data.redirect) { | |
1901 | new_index = hash & (n->rss_data.indirections_len - 1); | |
1902 | new_index = n->rss_data.indirections_table[new_index]; | |
1903 | } | |
1904 | ||
1905 | return (index == new_index) ? -1 : new_index; | |
1906 | } | |
1907 | ||
1908 | static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, | |
1909 | size_t size) | |
1910 | { | |
1911 | VirtIONet *n = qemu_get_nic_opaque(nc); | |
1912 | VirtIONetQueue *q; | |
1913 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
1914 | QEMU_UNINITIALIZED VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; | |
1915 | QEMU_UNINITIALIZED size_t lens[VIRTQUEUE_MAX_SIZE]; | |
1916 | QEMU_UNINITIALIZED struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; | |
1917 | struct virtio_net_hdr_v1_hash extra_hdr; | |
1918 | unsigned mhdr_cnt = 0; | |
1919 | size_t offset, i, guest_offset, j; | |
1920 | ssize_t err; | |
1921 | ||
1922 | memset(&extra_hdr, 0, sizeof(extra_hdr)); | |
1923 | ||
1924 | if (n->rss_data.enabled && n->rss_data.enabled_software_rss) { | |
1925 | int index = virtio_net_process_rss(nc, buf, size, &extra_hdr); | |
1926 | if (index >= 0) { | |
1927 | nc = qemu_get_subqueue(n->nic, index % n->curr_queue_pairs); | |
1928 | } | |
1929 | } | |
1930 | ||
1931 | if (!virtio_net_can_receive(nc)) { | |
1932 | return -1; | |
1933 | } | |
1934 | ||
1935 | q = virtio_net_get_subqueue(nc); | |
1936 | ||
1937 | /* hdr_len refers to the header we supply to the guest */ | |
1938 | if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { | |
1939 | return 0; | |
1940 | } | |
1941 | ||
1942 | if (!receive_filter(n, buf, size)) | |
1943 | return size; | |
1944 | ||
1945 | offset = i = 0; | |
1946 | ||
1947 | while (offset < size) { | |
1948 | VirtQueueElement *elem; | |
1949 | int len, total; | |
1950 | const struct iovec *sg; | |
1951 | ||
1952 | total = 0; | |
1953 | ||
1954 | if (i == VIRTQUEUE_MAX_SIZE) { | |
1955 | virtio_error(vdev, "virtio-net unexpected long buffer chain"); | |
1956 | err = size; | |
1957 | goto err; | |
1958 | } | |
1959 | ||
1960 | elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); | |
1961 | if (!elem) { | |
1962 | if (i) { | |
1963 | virtio_error(vdev, "virtio-net unexpected empty queue: " | |
1964 | "i %zd mergeable %d offset %zd, size %zd, " | |
1965 | "guest hdr len %zd, host hdr len %zd " | |
1966 | "guest features 0x%" PRIx64, | |
1967 | i, n->mergeable_rx_bufs, offset, size, | |
1968 | n->guest_hdr_len, n->host_hdr_len, | |
1969 | vdev->guest_features); | |
1970 | } | |
1971 | err = -1; | |
1972 | goto err; | |
1973 | } | |
1974 | ||
1975 | if (elem->in_num < 1) { | |
1976 | virtio_error(vdev, | |
1977 | "virtio-net receive queue contains no in buffers"); | |
1978 | virtqueue_detach_element(q->rx_vq, elem, 0); | |
1979 | g_free(elem); | |
1980 | err = -1; | |
1981 | goto err; | |
1982 | } | |
1983 | ||
1984 | sg = elem->in_sg; | |
1985 | if (i == 0) { | |
1986 | assert(offset == 0); | |
1987 | if (n->mergeable_rx_bufs) { | |
1988 | mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), | |
1989 | sg, elem->in_num, | |
1990 | offsetof(typeof(extra_hdr), hdr.num_buffers), | |
1991 | sizeof(extra_hdr.hdr.num_buffers)); | |
1992 | } else { | |
1993 | extra_hdr.hdr.num_buffers = cpu_to_le16(1); | |
1994 | } | |
1995 | ||
1996 | receive_header(n, sg, elem->in_num, buf, size); | |
1997 | if (n->rss_data.populate_hash) { | |
1998 | offset = offsetof(typeof(extra_hdr), hash_value); | |
1999 | iov_from_buf(sg, elem->in_num, offset, | |
2000 | (char *)&extra_hdr + offset, | |
2001 | sizeof(extra_hdr.hash_value) + | |
2002 | sizeof(extra_hdr.hash_report)); | |
2003 | } | |
2004 | offset = n->host_hdr_len; | |
2005 | total += n->guest_hdr_len; | |
2006 | guest_offset = n->guest_hdr_len; | |
2007 | } else { | |
2008 | guest_offset = 0; | |
2009 | } | |
2010 | ||
2011 | /* copy in packet. ugh */ | |
2012 | len = iov_from_buf(sg, elem->in_num, guest_offset, | |
2013 | buf + offset, size - offset); | |
2014 | total += len; | |
2015 | offset += len; | |
2016 | /* If buffers can't be merged, at this point we | |
2017 | * must have consumed the complete packet. | |
2018 | * Otherwise, drop it. */ | |
2019 | if (!n->mergeable_rx_bufs && offset < size) { | |
2020 | virtqueue_unpop(q->rx_vq, elem, total); | |
2021 | g_free(elem); | |
2022 | err = size; | |
2023 | goto err; | |
2024 | } | |
2025 | ||
2026 | elems[i] = elem; | |
2027 | lens[i] = total; | |
2028 | i++; | |
2029 | } | |
2030 | ||
2031 | if (mhdr_cnt) { | |
2032 | virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i); | |
2033 | iov_from_buf(mhdr_sg, mhdr_cnt, | |
2034 | 0, | |
2035 | &extra_hdr.hdr.num_buffers, | |
2036 | sizeof extra_hdr.hdr.num_buffers); | |
2037 | } | |
2038 | ||
2039 | for (j = 0; j < i; j++) { | |
2040 | /* signal other side */ | |
2041 | virtqueue_fill(q->rx_vq, elems[j], lens[j], j); | |
2042 | g_free(elems[j]); | |
2043 | } | |
2044 | ||
2045 | virtqueue_flush(q->rx_vq, i); | |
2046 | virtio_notify(vdev, q->rx_vq); | |
2047 | ||
2048 | return size; | |
2049 | ||
2050 | err: | |
2051 | for (j = 0; j < i; j++) { | |
2052 | virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); | |
2053 | g_free(elems[j]); | |
2054 | } | |
2055 | ||
2056 | return err; | |
2057 | } | |
2058 | ||
2059 | static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, | |
2060 | size_t size) | |
2061 | { | |
2062 | RCU_READ_LOCK_GUARD(); | |
2063 | ||
2064 | return virtio_net_receive_rcu(nc, buf, size); | |
2065 | } | |
2066 | ||
2067 | /* | |
2068 | * Accessors to read and write the IP packet data length field. This | |
2069 | * is a potentially unaligned network-byte-order 16 bit unsigned integer | |
2070 | * pointed to by unit->ip_len. | |
2071 | */ | |
2072 | static uint16_t read_unit_ip_len(VirtioNetRscUnit *unit) | |
2073 | { | |
2074 | return lduw_be_p(unit->ip_plen); | |
2075 | } | |
2076 | ||
2077 | static void write_unit_ip_len(VirtioNetRscUnit *unit, uint16_t l) | |
2078 | { | |
2079 | stw_be_p(unit->ip_plen, l); | |
2080 | } | |
2081 | ||
2082 | static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, | |
2083 | const uint8_t *buf, | |
2084 | VirtioNetRscUnit *unit) | |
2085 | { | |
2086 | uint16_t ip_hdrlen; | |
2087 | struct ip_header *ip; | |
2088 | ||
2089 | ip = (struct ip_header *)(buf + chain->n->guest_hdr_len | |
2090 | + sizeof(struct eth_header)); | |
2091 | unit->ip = (void *)ip; | |
2092 | ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; | |
2093 | unit->ip_plen = &ip->ip_len; | |
2094 | unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); | |
2095 | unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; | |
2096 | unit->payload = read_unit_ip_len(unit) - ip_hdrlen - unit->tcp_hdrlen; | |
2097 | } | |
2098 | ||
2099 | static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, | |
2100 | const uint8_t *buf, | |
2101 | VirtioNetRscUnit *unit) | |
2102 | { | |
2103 | struct ip6_header *ip6; | |
2104 | ||
2105 | ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len | |
2106 | + sizeof(struct eth_header)); | |
2107 | unit->ip = ip6; | |
2108 | unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); | |
2109 | unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) | |
2110 | + sizeof(struct ip6_header)); | |
2111 | unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; | |
2112 | ||
2113 | /* There is a difference between payload length in ipv4 and v6, | |
2114 | ip header is excluded in ipv6 */ | |
2115 | unit->payload = read_unit_ip_len(unit) - unit->tcp_hdrlen; | |
2116 | } | |
2117 | ||
2118 | static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, | |
2119 | VirtioNetRscSeg *seg) | |
2120 | { | |
2121 | int ret; | |
2122 | struct virtio_net_hdr_v1 *h; | |
2123 | ||
2124 | h = (struct virtio_net_hdr_v1 *)seg->buf; | |
2125 | h->flags = 0; | |
2126 | h->gso_type = VIRTIO_NET_HDR_GSO_NONE; | |
2127 | ||
2128 | if (seg->is_coalesced) { | |
2129 | h->rsc.segments = seg->packets; | |
2130 | h->rsc.dup_acks = seg->dup_ack; | |
2131 | h->flags = VIRTIO_NET_HDR_F_RSC_INFO; | |
2132 | if (chain->proto == ETH_P_IP) { | |
2133 | h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; | |
2134 | } else { | |
2135 | h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; | |
2136 | } | |
2137 | } | |
2138 | ||
2139 | ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); | |
2140 | QTAILQ_REMOVE(&chain->buffers, seg, next); | |
2141 | g_free(seg->buf); | |
2142 | g_free(seg); | |
2143 | ||
2144 | return ret; | |
2145 | } | |
2146 | ||
2147 | static void virtio_net_rsc_purge(void *opq) | |
2148 | { | |
2149 | VirtioNetRscSeg *seg, *rn; | |
2150 | VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; | |
2151 | ||
2152 | QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { | |
2153 | if (virtio_net_rsc_drain_seg(chain, seg) == 0) { | |
2154 | chain->stat.purge_failed++; | |
2155 | continue; | |
2156 | } | |
2157 | } | |
2158 | ||
2159 | chain->stat.timer++; | |
2160 | if (!QTAILQ_EMPTY(&chain->buffers)) { | |
2161 | timer_mod(chain->drain_timer, | |
2162 | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); | |
2163 | } | |
2164 | } | |
2165 | ||
2166 | static void virtio_net_rsc_cleanup(VirtIONet *n) | |
2167 | { | |
2168 | VirtioNetRscChain *chain, *rn_chain; | |
2169 | VirtioNetRscSeg *seg, *rn_seg; | |
2170 | ||
2171 | QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { | |
2172 | QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { | |
2173 | QTAILQ_REMOVE(&chain->buffers, seg, next); | |
2174 | g_free(seg->buf); | |
2175 | g_free(seg); | |
2176 | } | |
2177 | ||
2178 | timer_free(chain->drain_timer); | |
2179 | QTAILQ_REMOVE(&n->rsc_chains, chain, next); | |
2180 | g_free(chain); | |
2181 | } | |
2182 | } | |
2183 | ||
2184 | static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, | |
2185 | NetClientState *nc, | |
2186 | const uint8_t *buf, size_t size) | |
2187 | { | |
2188 | uint16_t hdr_len; | |
2189 | VirtioNetRscSeg *seg; | |
2190 | ||
2191 | hdr_len = chain->n->guest_hdr_len; | |
2192 | seg = g_new(VirtioNetRscSeg, 1); | |
2193 | seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) | |
2194 | + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); | |
2195 | memcpy(seg->buf, buf, size); | |
2196 | seg->size = size; | |
2197 | seg->packets = 1; | |
2198 | seg->dup_ack = 0; | |
2199 | seg->is_coalesced = 0; | |
2200 | seg->nc = nc; | |
2201 | ||
2202 | QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); | |
2203 | chain->stat.cache++; | |
2204 | ||
2205 | switch (chain->proto) { | |
2206 | case ETH_P_IP: | |
2207 | virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); | |
2208 | break; | |
2209 | case ETH_P_IPV6: | |
2210 | virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); | |
2211 | break; | |
2212 | default: | |
2213 | g_assert_not_reached(); | |
2214 | } | |
2215 | } | |
2216 | ||
2217 | static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, | |
2218 | VirtioNetRscSeg *seg, | |
2219 | const uint8_t *buf, | |
2220 | struct tcp_header *n_tcp, | |
2221 | struct tcp_header *o_tcp) | |
2222 | { | |
2223 | uint32_t nack, oack; | |
2224 | uint16_t nwin, owin; | |
2225 | ||
2226 | nack = htonl(n_tcp->th_ack); | |
2227 | nwin = htons(n_tcp->th_win); | |
2228 | oack = htonl(o_tcp->th_ack); | |
2229 | owin = htons(o_tcp->th_win); | |
2230 | ||
2231 | if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { | |
2232 | chain->stat.ack_out_of_win++; | |
2233 | return RSC_FINAL; | |
2234 | } else if (nack == oack) { | |
2235 | /* duplicated ack or window probe */ | |
2236 | if (nwin == owin) { | |
2237 | /* duplicated ack, add dup ack count due to whql test up to 1 */ | |
2238 | chain->stat.dup_ack++; | |
2239 | return RSC_FINAL; | |
2240 | } else { | |
2241 | /* Coalesce window update */ | |
2242 | o_tcp->th_win = n_tcp->th_win; | |
2243 | chain->stat.win_update++; | |
2244 | return RSC_COALESCE; | |
2245 | } | |
2246 | } else { | |
2247 | /* pure ack, go to 'C', finalize*/ | |
2248 | chain->stat.pure_ack++; | |
2249 | return RSC_FINAL; | |
2250 | } | |
2251 | } | |
2252 | ||
2253 | static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, | |
2254 | VirtioNetRscSeg *seg, | |
2255 | const uint8_t *buf, | |
2256 | VirtioNetRscUnit *n_unit) | |
2257 | { | |
2258 | void *data; | |
2259 | uint16_t o_ip_len; | |
2260 | uint32_t nseq, oseq; | |
2261 | VirtioNetRscUnit *o_unit; | |
2262 | ||
2263 | o_unit = &seg->unit; | |
2264 | o_ip_len = read_unit_ip_len(o_unit); | |
2265 | nseq = htonl(n_unit->tcp->th_seq); | |
2266 | oseq = htonl(o_unit->tcp->th_seq); | |
2267 | ||
2268 | /* out of order or retransmitted. */ | |
2269 | if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { | |
2270 | chain->stat.data_out_of_win++; | |
2271 | return RSC_FINAL; | |
2272 | } | |
2273 | ||
2274 | data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; | |
2275 | if (nseq == oseq) { | |
2276 | if ((o_unit->payload == 0) && n_unit->payload) { | |
2277 | /* From no payload to payload, normal case, not a dup ack or etc */ | |
2278 | chain->stat.data_after_pure_ack++; | |
2279 | goto coalesce; | |
2280 | } else { | |
2281 | return virtio_net_rsc_handle_ack(chain, seg, buf, | |
2282 | n_unit->tcp, o_unit->tcp); | |
2283 | } | |
2284 | } else if ((nseq - oseq) != o_unit->payload) { | |
2285 | /* Not a consistent packet, out of order */ | |
2286 | chain->stat.data_out_of_order++; | |
2287 | return RSC_FINAL; | |
2288 | } else { | |
2289 | coalesce: | |
2290 | if ((o_ip_len + n_unit->payload) > chain->max_payload) { | |
2291 | chain->stat.over_size++; | |
2292 | return RSC_FINAL; | |
2293 | } | |
2294 | ||
2295 | /* Here comes the right data, the payload length in v4/v6 is different, | |
2296 | so use the field value to update and record the new data len */ | |
2297 | o_unit->payload += n_unit->payload; /* update new data len */ | |
2298 | ||
2299 | /* update field in ip header */ | |
2300 | write_unit_ip_len(o_unit, o_ip_len + n_unit->payload); | |
2301 | ||
2302 | /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced | |
2303 | for windows guest, while this may change the behavior for linux | |
2304 | guest (only if it uses RSC feature). */ | |
2305 | o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; | |
2306 | ||
2307 | o_unit->tcp->th_ack = n_unit->tcp->th_ack; | |
2308 | o_unit->tcp->th_win = n_unit->tcp->th_win; | |
2309 | ||
2310 | memmove(seg->buf + seg->size, data, n_unit->payload); | |
2311 | seg->size += n_unit->payload; | |
2312 | seg->packets++; | |
2313 | chain->stat.coalesced++; | |
2314 | return RSC_COALESCE; | |
2315 | } | |
2316 | } | |
2317 | ||
2318 | static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, | |
2319 | VirtioNetRscSeg *seg, | |
2320 | const uint8_t *buf, size_t size, | |
2321 | VirtioNetRscUnit *unit) | |
2322 | { | |
2323 | struct ip_header *ip1, *ip2; | |
2324 | ||
2325 | ip1 = (struct ip_header *)(unit->ip); | |
2326 | ip2 = (struct ip_header *)(seg->unit.ip); | |
2327 | if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) | |
2328 | || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) | |
2329 | || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { | |
2330 | chain->stat.no_match++; | |
2331 | return RSC_NO_MATCH; | |
2332 | } | |
2333 | ||
2334 | return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); | |
2335 | } | |
2336 | ||
2337 | static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, | |
2338 | VirtioNetRscSeg *seg, | |
2339 | const uint8_t *buf, size_t size, | |
2340 | VirtioNetRscUnit *unit) | |
2341 | { | |
2342 | struct ip6_header *ip1, *ip2; | |
2343 | ||
2344 | ip1 = (struct ip6_header *)(unit->ip); | |
2345 | ip2 = (struct ip6_header *)(seg->unit.ip); | |
2346 | if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) | |
2347 | || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) | |
2348 | || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) | |
2349 | || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { | |
2350 | chain->stat.no_match++; | |
2351 | return RSC_NO_MATCH; | |
2352 | } | |
2353 | ||
2354 | return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); | |
2355 | } | |
2356 | ||
2357 | /* Packets with 'SYN' should bypass, other flag should be sent after drain | |
2358 | * to prevent out of order */ | |
2359 | static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, | |
2360 | struct tcp_header *tcp) | |
2361 | { | |
2362 | uint16_t tcp_hdr; | |
2363 | uint16_t tcp_flag; | |
2364 | ||
2365 | tcp_flag = htons(tcp->th_offset_flags); | |
2366 | tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; | |
2367 | tcp_flag &= VIRTIO_NET_TCP_FLAG; | |
2368 | if (tcp_flag & TH_SYN) { | |
2369 | chain->stat.tcp_syn++; | |
2370 | return RSC_BYPASS; | |
2371 | } | |
2372 | ||
2373 | if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { | |
2374 | chain->stat.tcp_ctrl_drain++; | |
2375 | return RSC_FINAL; | |
2376 | } | |
2377 | ||
2378 | if (tcp_hdr > sizeof(struct tcp_header)) { | |
2379 | chain->stat.tcp_all_opt++; | |
2380 | return RSC_FINAL; | |
2381 | } | |
2382 | ||
2383 | return RSC_CANDIDATE; | |
2384 | } | |
2385 | ||
2386 | static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, | |
2387 | NetClientState *nc, | |
2388 | const uint8_t *buf, size_t size, | |
2389 | VirtioNetRscUnit *unit) | |
2390 | { | |
2391 | int ret; | |
2392 | VirtioNetRscSeg *seg, *nseg; | |
2393 | ||
2394 | if (QTAILQ_EMPTY(&chain->buffers)) { | |
2395 | chain->stat.empty_cache++; | |
2396 | virtio_net_rsc_cache_buf(chain, nc, buf, size); | |
2397 | timer_mod(chain->drain_timer, | |
2398 | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); | |
2399 | return size; | |
2400 | } | |
2401 | ||
2402 | QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { | |
2403 | if (chain->proto == ETH_P_IP) { | |
2404 | ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); | |
2405 | } else { | |
2406 | ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); | |
2407 | } | |
2408 | ||
2409 | if (ret == RSC_FINAL) { | |
2410 | if (virtio_net_rsc_drain_seg(chain, seg) == 0) { | |
2411 | /* Send failed */ | |
2412 | chain->stat.final_failed++; | |
2413 | return 0; | |
2414 | } | |
2415 | ||
2416 | /* Send current packet */ | |
2417 | return virtio_net_do_receive(nc, buf, size); | |
2418 | } else if (ret == RSC_NO_MATCH) { | |
2419 | continue; | |
2420 | } else { | |
2421 | /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ | |
2422 | seg->is_coalesced = 1; | |
2423 | return size; | |
2424 | } | |
2425 | } | |
2426 | ||
2427 | chain->stat.no_match_cache++; | |
2428 | virtio_net_rsc_cache_buf(chain, nc, buf, size); | |
2429 | return size; | |
2430 | } | |
2431 | ||
2432 | /* Drain a connection data, this is to avoid out of order segments */ | |
2433 | static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, | |
2434 | NetClientState *nc, | |
2435 | const uint8_t *buf, size_t size, | |
2436 | uint16_t ip_start, uint16_t ip_size, | |
2437 | uint16_t tcp_port) | |
2438 | { | |
2439 | VirtioNetRscSeg *seg, *nseg; | |
2440 | uint32_t ppair1, ppair2; | |
2441 | ||
2442 | ppair1 = *(uint32_t *)(buf + tcp_port); | |
2443 | QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { | |
2444 | ppair2 = *(uint32_t *)(seg->buf + tcp_port); | |
2445 | if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) | |
2446 | || (ppair1 != ppair2)) { | |
2447 | continue; | |
2448 | } | |
2449 | if (virtio_net_rsc_drain_seg(chain, seg) == 0) { | |
2450 | chain->stat.drain_failed++; | |
2451 | } | |
2452 | ||
2453 | break; | |
2454 | } | |
2455 | ||
2456 | return virtio_net_do_receive(nc, buf, size); | |
2457 | } | |
2458 | ||
2459 | static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, | |
2460 | struct ip_header *ip, | |
2461 | const uint8_t *buf, size_t size) | |
2462 | { | |
2463 | uint16_t ip_len; | |
2464 | ||
2465 | /* Not an ipv4 packet */ | |
2466 | if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { | |
2467 | chain->stat.ip_option++; | |
2468 | return RSC_BYPASS; | |
2469 | } | |
2470 | ||
2471 | /* Don't handle packets with ip option */ | |
2472 | if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { | |
2473 | chain->stat.ip_option++; | |
2474 | return RSC_BYPASS; | |
2475 | } | |
2476 | ||
2477 | if (ip->ip_p != IPPROTO_TCP) { | |
2478 | chain->stat.bypass_not_tcp++; | |
2479 | return RSC_BYPASS; | |
2480 | } | |
2481 | ||
2482 | /* Don't handle packets with ip fragment */ | |
2483 | if (!(htons(ip->ip_off) & IP_DF)) { | |
2484 | chain->stat.ip_frag++; | |
2485 | return RSC_BYPASS; | |
2486 | } | |
2487 | ||
2488 | /* Don't handle packets with ecn flag */ | |
2489 | if (IPTOS_ECN(ip->ip_tos)) { | |
2490 | chain->stat.ip_ecn++; | |
2491 | return RSC_BYPASS; | |
2492 | } | |
2493 | ||
2494 | ip_len = htons(ip->ip_len); | |
2495 | if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) | |
2496 | || ip_len > (size - chain->n->guest_hdr_len - | |
2497 | sizeof(struct eth_header))) { | |
2498 | chain->stat.ip_hacked++; | |
2499 | return RSC_BYPASS; | |
2500 | } | |
2501 | ||
2502 | return RSC_CANDIDATE; | |
2503 | } | |
2504 | ||
2505 | static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, | |
2506 | NetClientState *nc, | |
2507 | const uint8_t *buf, size_t size) | |
2508 | { | |
2509 | int32_t ret; | |
2510 | uint16_t hdr_len; | |
2511 | VirtioNetRscUnit unit; | |
2512 | ||
2513 | hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; | |
2514 | ||
2515 | if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) | |
2516 | + sizeof(struct tcp_header))) { | |
2517 | chain->stat.bypass_not_tcp++; | |
2518 | return virtio_net_do_receive(nc, buf, size); | |
2519 | } | |
2520 | ||
2521 | virtio_net_rsc_extract_unit4(chain, buf, &unit); | |
2522 | if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) | |
2523 | != RSC_CANDIDATE) { | |
2524 | return virtio_net_do_receive(nc, buf, size); | |
2525 | } | |
2526 | ||
2527 | ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); | |
2528 | if (ret == RSC_BYPASS) { | |
2529 | return virtio_net_do_receive(nc, buf, size); | |
2530 | } else if (ret == RSC_FINAL) { | |
2531 | return virtio_net_rsc_drain_flow(chain, nc, buf, size, | |
2532 | ((hdr_len + sizeof(struct eth_header)) + 12), | |
2533 | VIRTIO_NET_IP4_ADDR_SIZE, | |
2534 | hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); | |
2535 | } | |
2536 | ||
2537 | return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); | |
2538 | } | |
2539 | ||
2540 | static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, | |
2541 | struct ip6_header *ip6, | |
2542 | const uint8_t *buf, size_t size) | |
2543 | { | |
2544 | uint16_t ip_len; | |
2545 | ||
2546 | if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) | |
2547 | != IP_HEADER_VERSION_6) { | |
2548 | return RSC_BYPASS; | |
2549 | } | |
2550 | ||
2551 | /* Both option and protocol is checked in this */ | |
2552 | if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { | |
2553 | chain->stat.bypass_not_tcp++; | |
2554 | return RSC_BYPASS; | |
2555 | } | |
2556 | ||
2557 | ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); | |
2558 | if (ip_len < sizeof(struct tcp_header) || | |
2559 | ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) | |
2560 | - sizeof(struct ip6_header))) { | |
2561 | chain->stat.ip_hacked++; | |
2562 | return RSC_BYPASS; | |
2563 | } | |
2564 | ||
2565 | /* Don't handle packets with ecn flag */ | |
2566 | if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { | |
2567 | chain->stat.ip_ecn++; | |
2568 | return RSC_BYPASS; | |
2569 | } | |
2570 | ||
2571 | return RSC_CANDIDATE; | |
2572 | } | |
2573 | ||
2574 | static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, | |
2575 | const uint8_t *buf, size_t size) | |
2576 | { | |
2577 | int32_t ret; | |
2578 | uint16_t hdr_len; | |
2579 | VirtioNetRscChain *chain; | |
2580 | VirtioNetRscUnit unit; | |
2581 | ||
2582 | chain = opq; | |
2583 | hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; | |
2584 | ||
2585 | if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) | |
2586 | + sizeof(tcp_header))) { | |
2587 | return virtio_net_do_receive(nc, buf, size); | |
2588 | } | |
2589 | ||
2590 | virtio_net_rsc_extract_unit6(chain, buf, &unit); | |
2591 | if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, | |
2592 | unit.ip, buf, size)) { | |
2593 | return virtio_net_do_receive(nc, buf, size); | |
2594 | } | |
2595 | ||
2596 | ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); | |
2597 | if (ret == RSC_BYPASS) { | |
2598 | return virtio_net_do_receive(nc, buf, size); | |
2599 | } else if (ret == RSC_FINAL) { | |
2600 | return virtio_net_rsc_drain_flow(chain, nc, buf, size, | |
2601 | ((hdr_len + sizeof(struct eth_header)) + 8), | |
2602 | VIRTIO_NET_IP6_ADDR_SIZE, | |
2603 | hdr_len + sizeof(struct eth_header) | |
2604 | + sizeof(struct ip6_header)); | |
2605 | } | |
2606 | ||
2607 | return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); | |
2608 | } | |
2609 | ||
2610 | static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, | |
2611 | NetClientState *nc, | |
2612 | uint16_t proto) | |
2613 | { | |
2614 | VirtioNetRscChain *chain; | |
2615 | ||
2616 | if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { | |
2617 | return NULL; | |
2618 | } | |
2619 | ||
2620 | QTAILQ_FOREACH(chain, &n->rsc_chains, next) { | |
2621 | if (chain->proto == proto) { | |
2622 | return chain; | |
2623 | } | |
2624 | } | |
2625 | ||
2626 | chain = g_malloc(sizeof(*chain)); | |
2627 | chain->n = n; | |
2628 | chain->proto = proto; | |
2629 | if (proto == (uint16_t)ETH_P_IP) { | |
2630 | chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; | |
2631 | chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; | |
2632 | } else { | |
2633 | chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; | |
2634 | chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; | |
2635 | } | |
2636 | chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, | |
2637 | virtio_net_rsc_purge, chain); | |
2638 | memset(&chain->stat, 0, sizeof(chain->stat)); | |
2639 | ||
2640 | QTAILQ_INIT(&chain->buffers); | |
2641 | QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); | |
2642 | ||
2643 | return chain; | |
2644 | } | |
2645 | ||
2646 | static ssize_t virtio_net_rsc_receive(NetClientState *nc, | |
2647 | const uint8_t *buf, | |
2648 | size_t size) | |
2649 | { | |
2650 | uint16_t proto; | |
2651 | VirtioNetRscChain *chain; | |
2652 | struct eth_header *eth; | |
2653 | VirtIONet *n; | |
2654 | ||
2655 | n = qemu_get_nic_opaque(nc); | |
2656 | if (size < (n->host_hdr_len + sizeof(struct eth_header))) { | |
2657 | return virtio_net_do_receive(nc, buf, size); | |
2658 | } | |
2659 | ||
2660 | eth = (struct eth_header *)(buf + n->guest_hdr_len); | |
2661 | proto = htons(eth->h_proto); | |
2662 | ||
2663 | chain = virtio_net_rsc_lookup_chain(n, nc, proto); | |
2664 | if (chain) { | |
2665 | chain->stat.received++; | |
2666 | if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { | |
2667 | return virtio_net_rsc_receive4(chain, nc, buf, size); | |
2668 | } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { | |
2669 | return virtio_net_rsc_receive6(chain, nc, buf, size); | |
2670 | } | |
2671 | } | |
2672 | return virtio_net_do_receive(nc, buf, size); | |
2673 | } | |
2674 | ||
2675 | static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, | |
2676 | size_t size) | |
2677 | { | |
2678 | VirtIONet *n = qemu_get_nic_opaque(nc); | |
2679 | if ((n->rsc4_enabled || n->rsc6_enabled)) { | |
2680 | return virtio_net_rsc_receive(nc, buf, size); | |
2681 | } else { | |
2682 | return virtio_net_do_receive(nc, buf, size); | |
2683 | } | |
2684 | } | |
2685 | ||
2686 | static int32_t virtio_net_flush_tx(VirtIONetQueue *q); | |
2687 | ||
2688 | static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) | |
2689 | { | |
2690 | VirtIONet *n = qemu_get_nic_opaque(nc); | |
2691 | VirtIONetQueue *q = virtio_net_get_subqueue(nc); | |
2692 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
2693 | int ret; | |
2694 | ||
2695 | virtqueue_push(q->tx_vq, q->async_tx.elem, 0); | |
2696 | virtio_notify(vdev, q->tx_vq); | |
2697 | ||
2698 | g_free(q->async_tx.elem); | |
2699 | q->async_tx.elem = NULL; | |
2700 | ||
2701 | virtio_queue_set_notification(q->tx_vq, 1); | |
2702 | ret = virtio_net_flush_tx(q); | |
2703 | if (ret >= n->tx_burst) { | |
2704 | /* | |
2705 | * the flush has been stopped by tx_burst | |
2706 | * we will not receive notification for the | |
2707 | * remainining part, so re-schedule | |
2708 | */ | |
2709 | virtio_queue_set_notification(q->tx_vq, 0); | |
2710 | if (q->tx_bh) { | |
2711 | replay_bh_schedule_event(q->tx_bh); | |
2712 | } else { | |
2713 | timer_mod(q->tx_timer, | |
2714 | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); | |
2715 | } | |
2716 | q->tx_waiting = 1; | |
2717 | } | |
2718 | } | |
2719 | ||
2720 | /* TX */ | |
2721 | static int32_t virtio_net_flush_tx(VirtIONetQueue *q) | |
2722 | { | |
2723 | VirtIONet *n = q->n; | |
2724 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
2725 | VirtQueueElement *elem; | |
2726 | int32_t num_packets = 0; | |
2727 | int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); | |
2728 | if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { | |
2729 | return num_packets; | |
2730 | } | |
2731 | ||
2732 | if (q->async_tx.elem) { | |
2733 | virtio_queue_set_notification(q->tx_vq, 0); | |
2734 | return num_packets; | |
2735 | } | |
2736 | ||
2737 | for (;;) { | |
2738 | ssize_t ret; | |
2739 | unsigned int out_num; | |
2740 | struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; | |
2741 | struct virtio_net_hdr vhdr; | |
2742 | ||
2743 | elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); | |
2744 | if (!elem) { | |
2745 | break; | |
2746 | } | |
2747 | ||
2748 | out_num = elem->out_num; | |
2749 | out_sg = elem->out_sg; | |
2750 | if (out_num < 1) { | |
2751 | virtio_error(vdev, "virtio-net header not in first element"); | |
2752 | goto detach; | |
2753 | } | |
2754 | ||
2755 | if (n->needs_vnet_hdr_swap) { | |
2756 | if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) < | |
2757 | sizeof(vhdr)) { | |
2758 | virtio_error(vdev, "virtio-net header incorrect"); | |
2759 | goto detach; | |
2760 | } | |
2761 | virtio_net_hdr_swap(vdev, &vhdr); | |
2762 | sg2[0].iov_base = &vhdr; | |
2763 | sg2[0].iov_len = sizeof(vhdr); | |
2764 | out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num, | |
2765 | sizeof(vhdr), -1); | |
2766 | if (out_num == VIRTQUEUE_MAX_SIZE) { | |
2767 | goto drop; | |
2768 | } | |
2769 | out_num += 1; | |
2770 | out_sg = sg2; | |
2771 | } | |
2772 | /* | |
2773 | * If host wants to see the guest header as is, we can | |
2774 | * pass it on unchanged. Otherwise, copy just the parts | |
2775 | * that host is interested in. | |
2776 | */ | |
2777 | assert(n->host_hdr_len <= n->guest_hdr_len); | |
2778 | if (n->host_hdr_len != n->guest_hdr_len) { | |
2779 | if (iov_size(out_sg, out_num) < n->guest_hdr_len) { | |
2780 | virtio_error(vdev, "virtio-net header is invalid"); | |
2781 | goto detach; | |
2782 | } | |
2783 | unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), | |
2784 | out_sg, out_num, | |
2785 | 0, n->host_hdr_len); | |
2786 | sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, | |
2787 | out_sg, out_num, | |
2788 | n->guest_hdr_len, -1); | |
2789 | out_num = sg_num; | |
2790 | out_sg = sg; | |
2791 | ||
2792 | if (out_num < 1) { | |
2793 | virtio_error(vdev, "virtio-net nothing to send"); | |
2794 | goto detach; | |
2795 | } | |
2796 | } | |
2797 | ||
2798 | ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), | |
2799 | out_sg, out_num, virtio_net_tx_complete); | |
2800 | if (ret == 0) { | |
2801 | virtio_queue_set_notification(q->tx_vq, 0); | |
2802 | q->async_tx.elem = elem; | |
2803 | return -EBUSY; | |
2804 | } | |
2805 | ||
2806 | drop: | |
2807 | virtqueue_push(q->tx_vq, elem, 0); | |
2808 | virtio_notify(vdev, q->tx_vq); | |
2809 | g_free(elem); | |
2810 | ||
2811 | if (++num_packets >= n->tx_burst) { | |
2812 | break; | |
2813 | } | |
2814 | } | |
2815 | return num_packets; | |
2816 | ||
2817 | detach: | |
2818 | virtqueue_detach_element(q->tx_vq, elem, 0); | |
2819 | g_free(elem); | |
2820 | return -EINVAL; | |
2821 | } | |
2822 | ||
2823 | static void virtio_net_tx_timer(void *opaque); | |
2824 | ||
2825 | static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) | |
2826 | { | |
2827 | VirtIONet *n = VIRTIO_NET(vdev); | |
2828 | VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; | |
2829 | ||
2830 | if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { | |
2831 | virtio_net_drop_tx_queue_data(vdev, vq); | |
2832 | return; | |
2833 | } | |
2834 | ||
2835 | /* This happens when device was stopped but VCPU wasn't. */ | |
2836 | if (!vdev->vm_running) { | |
2837 | q->tx_waiting = 1; | |
2838 | return; | |
2839 | } | |
2840 | ||
2841 | if (q->tx_waiting) { | |
2842 | /* We already have queued packets, immediately flush */ | |
2843 | timer_del(q->tx_timer); | |
2844 | virtio_net_tx_timer(q); | |
2845 | } else { | |
2846 | /* re-arm timer to flush it (and more) on next tick */ | |
2847 | timer_mod(q->tx_timer, | |
2848 | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); | |
2849 | q->tx_waiting = 1; | |
2850 | virtio_queue_set_notification(vq, 0); | |
2851 | } | |
2852 | } | |
2853 | ||
2854 | static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) | |
2855 | { | |
2856 | VirtIONet *n = VIRTIO_NET(vdev); | |
2857 | VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; | |
2858 | ||
2859 | if (unlikely(n->vhost_started)) { | |
2860 | return; | |
2861 | } | |
2862 | ||
2863 | if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { | |
2864 | virtio_net_drop_tx_queue_data(vdev, vq); | |
2865 | return; | |
2866 | } | |
2867 | ||
2868 | if (unlikely(q->tx_waiting)) { | |
2869 | return; | |
2870 | } | |
2871 | q->tx_waiting = 1; | |
2872 | /* This happens when device was stopped but VCPU wasn't. */ | |
2873 | if (!vdev->vm_running) { | |
2874 | return; | |
2875 | } | |
2876 | virtio_queue_set_notification(vq, 0); | |
2877 | replay_bh_schedule_event(q->tx_bh); | |
2878 | } | |
2879 | ||
2880 | static void virtio_net_tx_timer(void *opaque) | |
2881 | { | |
2882 | VirtIONetQueue *q = opaque; | |
2883 | VirtIONet *n = q->n; | |
2884 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
2885 | int ret; | |
2886 | ||
2887 | /* This happens when device was stopped but BH wasn't. */ | |
2888 | if (!vdev->vm_running) { | |
2889 | /* Make sure tx waiting is set, so we'll run when restarted. */ | |
2890 | assert(q->tx_waiting); | |
2891 | return; | |
2892 | } | |
2893 | ||
2894 | q->tx_waiting = 0; | |
2895 | ||
2896 | /* Just in case the driver is not ready on more */ | |
2897 | if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { | |
2898 | return; | |
2899 | } | |
2900 | ||
2901 | ret = virtio_net_flush_tx(q); | |
2902 | if (ret == -EBUSY || ret == -EINVAL) { | |
2903 | return; | |
2904 | } | |
2905 | /* | |
2906 | * If we flush a full burst of packets, assume there are | |
2907 | * more coming and immediately rearm | |
2908 | */ | |
2909 | if (ret >= n->tx_burst) { | |
2910 | q->tx_waiting = 1; | |
2911 | timer_mod(q->tx_timer, | |
2912 | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); | |
2913 | return; | |
2914 | } | |
2915 | /* | |
2916 | * If less than a full burst, re-enable notification and flush | |
2917 | * anything that may have come in while we weren't looking. If | |
2918 | * we find something, assume the guest is still active and rearm | |
2919 | */ | |
2920 | virtio_queue_set_notification(q->tx_vq, 1); | |
2921 | ret = virtio_net_flush_tx(q); | |
2922 | if (ret > 0) { | |
2923 | virtio_queue_set_notification(q->tx_vq, 0); | |
2924 | q->tx_waiting = 1; | |
2925 | timer_mod(q->tx_timer, | |
2926 | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); | |
2927 | } | |
2928 | } | |
2929 | ||
2930 | static void virtio_net_tx_bh(void *opaque) | |
2931 | { | |
2932 | VirtIONetQueue *q = opaque; | |
2933 | VirtIONet *n = q->n; | |
2934 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
2935 | int32_t ret; | |
2936 | ||
2937 | /* This happens when device was stopped but BH wasn't. */ | |
2938 | if (!vdev->vm_running) { | |
2939 | /* Make sure tx waiting is set, so we'll run when restarted. */ | |
2940 | assert(q->tx_waiting); | |
2941 | return; | |
2942 | } | |
2943 | ||
2944 | q->tx_waiting = 0; | |
2945 | ||
2946 | /* Just in case the driver is not ready on more */ | |
2947 | if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { | |
2948 | return; | |
2949 | } | |
2950 | ||
2951 | ret = virtio_net_flush_tx(q); | |
2952 | if (ret == -EBUSY || ret == -EINVAL) { | |
2953 | return; /* Notification re-enable handled by tx_complete or device | |
2954 | * broken */ | |
2955 | } | |
2956 | ||
2957 | /* If we flush a full burst of packets, assume there are | |
2958 | * more coming and immediately reschedule */ | |
2959 | if (ret >= n->tx_burst) { | |
2960 | replay_bh_schedule_event(q->tx_bh); | |
2961 | q->tx_waiting = 1; | |
2962 | return; | |
2963 | } | |
2964 | ||
2965 | /* If less than a full burst, re-enable notification and flush | |
2966 | * anything that may have come in while we weren't looking. If | |
2967 | * we find something, assume the guest is still active and reschedule */ | |
2968 | virtio_queue_set_notification(q->tx_vq, 1); | |
2969 | ret = virtio_net_flush_tx(q); | |
2970 | if (ret == -EINVAL) { | |
2971 | return; | |
2972 | } else if (ret > 0) { | |
2973 | virtio_queue_set_notification(q->tx_vq, 0); | |
2974 | replay_bh_schedule_event(q->tx_bh); | |
2975 | q->tx_waiting = 1; | |
2976 | } | |
2977 | } | |
2978 | ||
2979 | static void virtio_net_add_queue(VirtIONet *n, int index) | |
2980 | { | |
2981 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
2982 | ||
2983 | n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, | |
2984 | virtio_net_handle_rx); | |
2985 | ||
2986 | if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { | |
2987 | n->vqs[index].tx_vq = | |
2988 | virtio_add_queue(vdev, n->net_conf.tx_queue_size, | |
2989 | virtio_net_handle_tx_timer); | |
2990 | n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, | |
2991 | virtio_net_tx_timer, | |
2992 | &n->vqs[index]); | |
2993 | } else { | |
2994 | n->vqs[index].tx_vq = | |
2995 | virtio_add_queue(vdev, n->net_conf.tx_queue_size, | |
2996 | virtio_net_handle_tx_bh); | |
2997 | n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], | |
2998 | &DEVICE(vdev)->mem_reentrancy_guard); | |
2999 | } | |
3000 | ||
3001 | n->vqs[index].tx_waiting = 0; | |
3002 | n->vqs[index].n = n; | |
3003 | } | |
3004 | ||
3005 | static void virtio_net_del_queue(VirtIONet *n, int index) | |
3006 | { | |
3007 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
3008 | VirtIONetQueue *q = &n->vqs[index]; | |
3009 | NetClientState *nc = qemu_get_subqueue(n->nic, index); | |
3010 | ||
3011 | qemu_purge_queued_packets(nc); | |
3012 | ||
3013 | virtio_del_queue(vdev, index * 2); | |
3014 | if (q->tx_timer) { | |
3015 | timer_free(q->tx_timer); | |
3016 | q->tx_timer = NULL; | |
3017 | } else { | |
3018 | qemu_bh_delete(q->tx_bh); | |
3019 | q->tx_bh = NULL; | |
3020 | } | |
3021 | q->tx_waiting = 0; | |
3022 | virtio_del_queue(vdev, index * 2 + 1); | |
3023 | } | |
3024 | ||
3025 | static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) | |
3026 | { | |
3027 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
3028 | int old_num_queues = virtio_get_num_queues(vdev); | |
3029 | int new_num_queues = new_max_queue_pairs * 2 + 1; | |
3030 | int i; | |
3031 | ||
3032 | assert(old_num_queues >= 3); | |
3033 | assert(old_num_queues % 2 == 1); | |
3034 | ||
3035 | if (old_num_queues == new_num_queues) { | |
3036 | return; | |
3037 | } | |
3038 | ||
3039 | /* | |
3040 | * We always need to remove and add ctrl vq if | |
3041 | * old_num_queues != new_num_queues. Remove ctrl_vq first, | |
3042 | * and then we only enter one of the following two loops. | |
3043 | */ | |
3044 | virtio_del_queue(vdev, old_num_queues - 1); | |
3045 | ||
3046 | for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { | |
3047 | /* new_num_queues < old_num_queues */ | |
3048 | virtio_net_del_queue(n, i / 2); | |
3049 | } | |
3050 | ||
3051 | for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { | |
3052 | /* new_num_queues > old_num_queues */ | |
3053 | virtio_net_add_queue(n, i / 2); | |
3054 | } | |
3055 | ||
3056 | /* add ctrl_vq last */ | |
3057 | n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); | |
3058 | } | |
3059 | ||
3060 | static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) | |
3061 | { | |
3062 | int max = multiqueue ? n->max_queue_pairs : 1; | |
3063 | ||
3064 | n->multiqueue = multiqueue; | |
3065 | virtio_net_change_num_queue_pairs(n, max); | |
3066 | ||
3067 | virtio_net_set_queue_pairs(n); | |
3068 | } | |
3069 | ||
3070 | static int virtio_net_pre_load_queues(VirtIODevice *vdev) | |
3071 | { | |
3072 | virtio_net_set_multiqueue(VIRTIO_NET(vdev), | |
3073 | virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_RSS) || | |
3074 | virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MQ)); | |
3075 | ||
3076 | return 0; | |
3077 | } | |
3078 | ||
3079 | static int virtio_net_post_load_device(void *opaque, int version_id) | |
3080 | { | |
3081 | VirtIONet *n = opaque; | |
3082 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
3083 | int i, link_down; | |
3084 | ||
3085 | trace_virtio_net_post_load_device(); | |
3086 | virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, | |
3087 | virtio_vdev_has_feature(vdev, | |
3088 | VIRTIO_F_VERSION_1), | |
3089 | virtio_vdev_has_feature(vdev, | |
3090 | VIRTIO_NET_F_HASH_REPORT)); | |
3091 | ||
3092 | /* MAC_TABLE_ENTRIES may be different from the saved image */ | |
3093 | if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { | |
3094 | n->mac_table.in_use = 0; | |
3095 | } | |
3096 | ||
3097 | if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { | |
3098 | n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); | |
3099 | } | |
3100 | ||
3101 | /* | |
3102 | * curr_guest_offloads will be later overwritten by the | |
3103 | * virtio_set_features_nocheck call done from the virtio_load. | |
3104 | * Here we make sure it is preserved and restored accordingly | |
3105 | * in the virtio_net_post_load_virtio callback. | |
3106 | */ | |
3107 | n->saved_guest_offloads = n->curr_guest_offloads; | |
3108 | ||
3109 | virtio_net_set_queue_pairs(n); | |
3110 | ||
3111 | /* Find the first multicast entry in the saved MAC filter */ | |
3112 | for (i = 0; i < n->mac_table.in_use; i++) { | |
3113 | if (n->mac_table.macs[i * ETH_ALEN] & 1) { | |
3114 | break; | |
3115 | } | |
3116 | } | |
3117 | n->mac_table.first_multi = i; | |
3118 | ||
3119 | /* nc.link_down can't be migrated, so infer link_down according | |
3120 | * to link status bit in n->status */ | |
3121 | link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; | |
3122 | for (i = 0; i < n->max_queue_pairs; i++) { | |
3123 | qemu_get_subqueue(n->nic, i)->link_down = link_down; | |
3124 | } | |
3125 | ||
3126 | if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && | |
3127 | virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { | |
3128 | qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), | |
3129 | QEMU_CLOCK_VIRTUAL, | |
3130 | virtio_net_announce_timer, n); | |
3131 | if (n->announce_timer.round) { | |
3132 | timer_mod(n->announce_timer.tm, | |
3133 | qemu_clock_get_ms(n->announce_timer.type)); | |
3134 | } else { | |
3135 | qemu_announce_timer_del(&n->announce_timer, false); | |
3136 | } | |
3137 | } | |
3138 | ||
3139 | virtio_net_commit_rss_config(n); | |
3140 | return 0; | |
3141 | } | |
3142 | ||
3143 | static int virtio_net_post_load_virtio(VirtIODevice *vdev) | |
3144 | { | |
3145 | VirtIONet *n = VIRTIO_NET(vdev); | |
3146 | /* | |
3147 | * The actual needed state is now in saved_guest_offloads, | |
3148 | * see virtio_net_post_load_device for detail. | |
3149 | * Restore it back and apply the desired offloads. | |
3150 | */ | |
3151 | n->curr_guest_offloads = n->saved_guest_offloads; | |
3152 | if (peer_has_vnet_hdr(n)) { | |
3153 | virtio_net_apply_guest_offloads(n); | |
3154 | } | |
3155 | ||
3156 | return 0; | |
3157 | } | |
3158 | ||
3159 | /* tx_waiting field of a VirtIONetQueue */ | |
3160 | static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { | |
3161 | .name = "virtio-net-queue-tx_waiting", | |
3162 | .fields = (const VMStateField[]) { | |
3163 | VMSTATE_UINT32(tx_waiting, VirtIONetQueue), | |
3164 | VMSTATE_END_OF_LIST() | |
3165 | }, | |
3166 | }; | |
3167 | ||
3168 | static bool max_queue_pairs_gt_1(void *opaque, int version_id) | |
3169 | { | |
3170 | return VIRTIO_NET(opaque)->max_queue_pairs > 1; | |
3171 | } | |
3172 | ||
3173 | static bool has_ctrl_guest_offloads(void *opaque, int version_id) | |
3174 | { | |
3175 | return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), | |
3176 | VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); | |
3177 | } | |
3178 | ||
3179 | static bool mac_table_fits(void *opaque, int version_id) | |
3180 | { | |
3181 | return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; | |
3182 | } | |
3183 | ||
3184 | static bool mac_table_doesnt_fit(void *opaque, int version_id) | |
3185 | { | |
3186 | return !mac_table_fits(opaque, version_id); | |
3187 | } | |
3188 | ||
3189 | /* This temporary type is shared by all the WITH_TMP methods | |
3190 | * although only some fields are used by each. | |
3191 | */ | |
3192 | struct VirtIONetMigTmp { | |
3193 | VirtIONet *parent; | |
3194 | VirtIONetQueue *vqs_1; | |
3195 | uint16_t curr_queue_pairs_1; | |
3196 | uint8_t has_ufo; | |
3197 | uint32_t has_vnet_hdr; | |
3198 | }; | |
3199 | ||
3200 | /* The 2nd and subsequent tx_waiting flags are loaded later than | |
3201 | * the 1st entry in the queue_pairs and only if there's more than one | |
3202 | * entry. We use the tmp mechanism to calculate a temporary | |
3203 | * pointer and count and also validate the count. | |
3204 | */ | |
3205 | ||
3206 | static int virtio_net_tx_waiting_pre_save(void *opaque) | |
3207 | { | |
3208 | struct VirtIONetMigTmp *tmp = opaque; | |
3209 | ||
3210 | tmp->vqs_1 = tmp->parent->vqs + 1; | |
3211 | tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; | |
3212 | if (tmp->parent->curr_queue_pairs == 0) { | |
3213 | tmp->curr_queue_pairs_1 = 0; | |
3214 | } | |
3215 | ||
3216 | return 0; | |
3217 | } | |
3218 | ||
3219 | static int virtio_net_tx_waiting_pre_load(void *opaque) | |
3220 | { | |
3221 | struct VirtIONetMigTmp *tmp = opaque; | |
3222 | ||
3223 | /* Reuse the pointer setup from save */ | |
3224 | virtio_net_tx_waiting_pre_save(opaque); | |
3225 | ||
3226 | if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { | |
3227 | error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", | |
3228 | tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); | |
3229 | ||
3230 | return -EINVAL; | |
3231 | } | |
3232 | ||
3233 | return 0; /* all good */ | |
3234 | } | |
3235 | ||
3236 | static const VMStateDescription vmstate_virtio_net_tx_waiting = { | |
3237 | .name = "virtio-net-tx_waiting", | |
3238 | .pre_load = virtio_net_tx_waiting_pre_load, | |
3239 | .pre_save = virtio_net_tx_waiting_pre_save, | |
3240 | .fields = (const VMStateField[]) { | |
3241 | VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, | |
3242 | curr_queue_pairs_1, | |
3243 | vmstate_virtio_net_queue_tx_waiting, | |
3244 | struct VirtIONetQueue), | |
3245 | VMSTATE_END_OF_LIST() | |
3246 | }, | |
3247 | }; | |
3248 | ||
3249 | /* the 'has_ufo' flag is just tested; if the incoming stream has the | |
3250 | * flag set we need to check that we have it | |
3251 | */ | |
3252 | static int virtio_net_ufo_post_load(void *opaque, int version_id) | |
3253 | { | |
3254 | struct VirtIONetMigTmp *tmp = opaque; | |
3255 | ||
3256 | if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { | |
3257 | error_report("virtio-net: saved image requires TUN_F_UFO support"); | |
3258 | return -EINVAL; | |
3259 | } | |
3260 | ||
3261 | return 0; | |
3262 | } | |
3263 | ||
3264 | static int virtio_net_ufo_pre_save(void *opaque) | |
3265 | { | |
3266 | struct VirtIONetMigTmp *tmp = opaque; | |
3267 | ||
3268 | tmp->has_ufo = tmp->parent->has_ufo; | |
3269 | ||
3270 | return 0; | |
3271 | } | |
3272 | ||
3273 | static const VMStateDescription vmstate_virtio_net_has_ufo = { | |
3274 | .name = "virtio-net-ufo", | |
3275 | .post_load = virtio_net_ufo_post_load, | |
3276 | .pre_save = virtio_net_ufo_pre_save, | |
3277 | .fields = (const VMStateField[]) { | |
3278 | VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), | |
3279 | VMSTATE_END_OF_LIST() | |
3280 | }, | |
3281 | }; | |
3282 | ||
3283 | /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the | |
3284 | * flag set we need to check that we have it | |
3285 | */ | |
3286 | static int virtio_net_vnet_post_load(void *opaque, int version_id) | |
3287 | { | |
3288 | struct VirtIONetMigTmp *tmp = opaque; | |
3289 | ||
3290 | if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { | |
3291 | error_report("virtio-net: saved image requires vnet_hdr=on"); | |
3292 | return -EINVAL; | |
3293 | } | |
3294 | ||
3295 | return 0; | |
3296 | } | |
3297 | ||
3298 | static int virtio_net_vnet_pre_save(void *opaque) | |
3299 | { | |
3300 | struct VirtIONetMigTmp *tmp = opaque; | |
3301 | ||
3302 | tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; | |
3303 | ||
3304 | return 0; | |
3305 | } | |
3306 | ||
3307 | static const VMStateDescription vmstate_virtio_net_has_vnet = { | |
3308 | .name = "virtio-net-vnet", | |
3309 | .post_load = virtio_net_vnet_post_load, | |
3310 | .pre_save = virtio_net_vnet_pre_save, | |
3311 | .fields = (const VMStateField[]) { | |
3312 | VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), | |
3313 | VMSTATE_END_OF_LIST() | |
3314 | }, | |
3315 | }; | |
3316 | ||
3317 | static bool virtio_net_rss_needed(void *opaque) | |
3318 | { | |
3319 | return VIRTIO_NET(opaque)->rss_data.enabled; | |
3320 | } | |
3321 | ||
3322 | static const VMStateDescription vmstate_virtio_net_rss = { | |
3323 | .name = "virtio-net-device/rss", | |
3324 | .version_id = 1, | |
3325 | .minimum_version_id = 1, | |
3326 | .needed = virtio_net_rss_needed, | |
3327 | .fields = (const VMStateField[]) { | |
3328 | VMSTATE_BOOL(rss_data.enabled, VirtIONet), | |
3329 | VMSTATE_BOOL(rss_data.redirect, VirtIONet), | |
3330 | VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), | |
3331 | VMSTATE_UINT32(rss_data.hash_types, VirtIONet), | |
3332 | VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), | |
3333 | VMSTATE_UINT16(rss_data.default_queue, VirtIONet), | |
3334 | VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, | |
3335 | VIRTIO_NET_RSS_MAX_KEY_SIZE), | |
3336 | VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, | |
3337 | rss_data.indirections_len, 0, | |
3338 | vmstate_info_uint16, uint16_t), | |
3339 | VMSTATE_END_OF_LIST() | |
3340 | }, | |
3341 | }; | |
3342 | ||
3343 | static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) | |
3344 | { | |
3345 | VirtIONet *n = VIRTIO_NET(vdev); | |
3346 | NetClientState *nc; | |
3347 | struct vhost_net *net; | |
3348 | ||
3349 | if (!n->nic) { | |
3350 | return NULL; | |
3351 | } | |
3352 | ||
3353 | nc = qemu_get_queue(n->nic); | |
3354 | if (!nc) { | |
3355 | return NULL; | |
3356 | } | |
3357 | ||
3358 | net = get_vhost_net(nc->peer); | |
3359 | if (!net) { | |
3360 | return NULL; | |
3361 | } | |
3362 | ||
3363 | return &net->dev; | |
3364 | } | |
3365 | ||
3366 | static int vhost_user_net_save_state(QEMUFile *f, void *pv, size_t size, | |
3367 | const VMStateField *field, | |
3368 | JSONWriter *vmdesc) | |
3369 | { | |
3370 | VirtIONet *n = pv; | |
3371 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
3372 | struct vhost_dev *vhdev; | |
3373 | Error *local_error = NULL; | |
3374 | int ret; | |
3375 | ||
3376 | vhdev = virtio_net_get_vhost(vdev); | |
3377 | if (vhdev == NULL) { | |
3378 | error_reportf_err(local_error, | |
3379 | "Error getting vhost back-end of %s device %s: ", | |
3380 | vdev->name, vdev->parent_obj.canonical_path); | |
3381 | return -1; | |
3382 | } | |
3383 | ||
3384 | ret = vhost_save_backend_state(vhdev, f, &local_error); | |
3385 | if (ret < 0) { | |
3386 | error_reportf_err(local_error, | |
3387 | "Error saving back-end state of %s device %s: ", | |
3388 | vdev->name, vdev->parent_obj.canonical_path); | |
3389 | return ret; | |
3390 | } | |
3391 | ||
3392 | return 0; | |
3393 | } | |
3394 | ||
3395 | static int vhost_user_net_load_state(QEMUFile *f, void *pv, size_t size, | |
3396 | const VMStateField *field) | |
3397 | { | |
3398 | VirtIONet *n = pv; | |
3399 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
3400 | struct vhost_dev *vhdev; | |
3401 | Error *local_error = NULL; | |
3402 | int ret; | |
3403 | ||
3404 | vhdev = virtio_net_get_vhost(vdev); | |
3405 | if (vhdev == NULL) { | |
3406 | error_reportf_err(local_error, | |
3407 | "Error getting vhost back-end of %s device %s: ", | |
3408 | vdev->name, vdev->parent_obj.canonical_path); | |
3409 | return -1; | |
3410 | } | |
3411 | ||
3412 | ret = vhost_load_backend_state(vhdev, f, &local_error); | |
3413 | if (ret < 0) { | |
3414 | error_reportf_err(local_error, | |
3415 | "Error loading back-end state of %s device %s: ", | |
3416 | vdev->name, vdev->parent_obj.canonical_path); | |
3417 | return ret; | |
3418 | } | |
3419 | ||
3420 | return 0; | |
3421 | } | |
3422 | ||
3423 | static bool vhost_user_net_is_internal_migration(void *opaque) | |
3424 | { | |
3425 | VirtIONet *n = opaque; | |
3426 | VirtIODevice *vdev = VIRTIO_DEVICE(n); | |
3427 | struct vhost_dev *vhdev; | |
3428 | ||
3429 | vhdev = virtio_net_get_vhost(vdev); | |
3430 | if (vhdev == NULL) { | |
3431 | return false; | |
3432 | } | |
3433 | ||
3434 | return vhost_supports_device_state(vhdev); | |
3435 | } | |
3436 | ||
3437 | static const VMStateDescription vhost_user_net_backend_state = { | |
3438 | .name = "virtio-net-device/backend", | |
3439 | .version_id = 0, | |
3440 | .needed = vhost_user_net_is_internal_migration, | |
3441 | .fields = (const VMStateField[]) { | |
3442 | { | |
3443 | .name = "backend", | |
3444 | .info = &(const VMStateInfo) { | |
3445 | .name = "virtio-net vhost-user backend state", | |
3446 | .get = vhost_user_net_load_state, | |
3447 | .put = vhost_user_net_save_state, | |
3448 | }, | |
3449 | }, | |
3450 | VMSTATE_END_OF_LIST() | |
3451 | } | |
3452 | }; | |
3453 | ||
3454 | static const VMStateDescription vmstate_virtio_net_device = { | |
3455 | .name = "virtio-net-device", | |
3456 | .version_id = VIRTIO_NET_VM_VERSION, | |
3457 | .minimum_version_id = VIRTIO_NET_VM_VERSION, | |
3458 | .post_load = virtio_net_post_load_device, | |
3459 | .fields = (const VMStateField[]) { | |
3460 | VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), | |
3461 | VMSTATE_STRUCT_POINTER(vqs, VirtIONet, | |
3462 | vmstate_virtio_net_queue_tx_waiting, | |
3463 | VirtIONetQueue), | |
3464 | VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), | |
3465 | VMSTATE_UINT16(status, VirtIONet), | |
3466 | VMSTATE_UINT8(promisc, VirtIONet), | |
3467 | VMSTATE_UINT8(allmulti, VirtIONet), | |
3468 | VMSTATE_UINT32(mac_table.in_use, VirtIONet), | |
3469 | ||
3470 | /* Guarded pair: If it fits we load it, else we throw it away | |
3471 | * - can happen if source has a larger MAC table.; post-load | |
3472 | * sets flags in this case. | |
3473 | */ | |
3474 | VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, | |
3475 | 0, mac_table_fits, mac_table.in_use, | |
3476 | ETH_ALEN), | |
3477 | VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, | |
3478 | mac_table.in_use, ETH_ALEN), | |
3479 | ||
3480 | /* Note: This is an array of uint32's that's always been saved as a | |
3481 | * buffer; hold onto your endiannesses; it's actually used as a bitmap | |
3482 | * but based on the uint. | |
3483 | */ | |
3484 | VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), | |
3485 | VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, | |
3486 | vmstate_virtio_net_has_vnet), | |
3487 | VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), | |
3488 | VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), | |
3489 | VMSTATE_UINT8(alluni, VirtIONet), | |
3490 | VMSTATE_UINT8(nomulti, VirtIONet), | |
3491 | VMSTATE_UINT8(nouni, VirtIONet), | |
3492 | VMSTATE_UINT8(nobcast, VirtIONet), | |
3493 | VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, | |
3494 | vmstate_virtio_net_has_ufo), | |
3495 | VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, | |
3496 | vmstate_info_uint16_equal, uint16_t), | |
3497 | VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), | |
3498 | VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, | |
3499 | vmstate_virtio_net_tx_waiting), | |
3500 | VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, | |
3501 | has_ctrl_guest_offloads), | |
3502 | VMSTATE_END_OF_LIST() | |
3503 | }, | |
3504 | .subsections = (const VMStateDescription * const []) { | |
3505 | &vmstate_virtio_net_rss, | |
3506 | &vhost_user_net_backend_state, | |
3507 | NULL | |
3508 | } | |
3509 | }; | |
3510 | ||
3511 | static NetClientInfo net_virtio_info = { | |
3512 | .type = NET_CLIENT_DRIVER_NIC, | |
3513 | .size = sizeof(NICState), | |
3514 | .can_receive = virtio_net_can_receive, | |
3515 | .receive = virtio_net_receive, | |
3516 | .link_status_changed = virtio_net_set_link_status, | |
3517 | .query_rx_filter = virtio_net_query_rxfilter, | |
3518 | .announce = virtio_net_announce, | |
3519 | }; | |
3520 | ||
3521 | static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) | |
3522 | { | |
3523 | VirtIONet *n = VIRTIO_NET(vdev); | |
3524 | NetClientState *nc; | |
3525 | assert(n->vhost_started); | |
3526 | if (!n->multiqueue && idx == 2) { | |
3527 | /* Must guard against invalid features and bogus queue index | |
3528 | * from being set by malicious guest, or penetrated through | |
3529 | * buggy migration stream. | |
3530 | */ | |
3531 | if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { | |
3532 | qemu_log_mask(LOG_GUEST_ERROR, | |
3533 | "%s: bogus vq index ignored\n", __func__); | |
3534 | return false; | |
3535 | } | |
3536 | nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); | |
3537 | } else { | |
3538 | nc = qemu_get_subqueue(n->nic, vq2q(idx)); | |
3539 | } | |
3540 | /* | |
3541 | * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 | |
3542 | * as the macro of configure interrupt's IDX, If this driver does not | |
3543 | * support, the function will return false | |
3544 | */ | |
3545 | ||
3546 | if (idx == VIRTIO_CONFIG_IRQ_IDX) { | |
3547 | return vhost_net_config_pending(get_vhost_net(nc->peer)); | |
3548 | } | |
3549 | return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); | |
3550 | } | |
3551 | ||
3552 | static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, | |
3553 | bool mask) | |
3554 | { | |
3555 | VirtIONet *n = VIRTIO_NET(vdev); | |
3556 | NetClientState *nc; | |
3557 | assert(n->vhost_started); | |
3558 | if (!n->multiqueue && idx == 2) { | |
3559 | /* Must guard against invalid features and bogus queue index | |
3560 | * from being set by malicious guest, or penetrated through | |
3561 | * buggy migration stream. | |
3562 | */ | |
3563 | if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { | |
3564 | qemu_log_mask(LOG_GUEST_ERROR, | |
3565 | "%s: bogus vq index ignored\n", __func__); | |
3566 | return; | |
3567 | } | |
3568 | nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); | |
3569 | } else { | |
3570 | nc = qemu_get_subqueue(n->nic, vq2q(idx)); | |
3571 | } | |
3572 | /* | |
3573 | *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 | |
3574 | * as the macro of configure interrupt's IDX, If this driver does not | |
3575 | * support, the function will return | |
3576 | */ | |
3577 | ||
3578 | if (idx == VIRTIO_CONFIG_IRQ_IDX) { | |
3579 | vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); | |
3580 | return; | |
3581 | } | |
3582 | vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); | |
3583 | } | |
3584 | ||
3585 | static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) | |
3586 | { | |
3587 | virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); | |
3588 | ||
3589 | n->config_size = virtio_get_config_size(&cfg_size_params, host_features); | |
3590 | } | |
3591 | ||
3592 | void virtio_net_set_netclient_name(VirtIONet *n, const char *name, | |
3593 | const char *type) | |
3594 | { | |
3595 | /* | |
3596 | * The name can be NULL, the netclient name will be type.x. | |
3597 | */ | |
3598 | assert(type != NULL); | |
3599 | ||
3600 | g_free(n->netclient_name); | |
3601 | g_free(n->netclient_type); | |
3602 | n->netclient_name = g_strdup(name); | |
3603 | n->netclient_type = g_strdup(type); | |
3604 | } | |
3605 | ||
3606 | static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) | |
3607 | { | |
3608 | HotplugHandler *hotplug_ctrl; | |
3609 | PCIDevice *pci_dev; | |
3610 | Error *err = NULL; | |
3611 | ||
3612 | hotplug_ctrl = qdev_get_hotplug_handler(dev); | |
3613 | if (hotplug_ctrl) { | |
3614 | pci_dev = PCI_DEVICE(dev); | |
3615 | pci_dev->partially_hotplugged = true; | |
3616 | hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); | |
3617 | if (err) { | |
3618 | error_report_err(err); | |
3619 | return false; | |
3620 | } | |
3621 | } else { | |
3622 | return false; | |
3623 | } | |
3624 | return true; | |
3625 | } | |
3626 | ||
3627 | static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, | |
3628 | Error **errp) | |
3629 | { | |
3630 | Error *err = NULL; | |
3631 | HotplugHandler *hotplug_ctrl; | |
3632 | PCIDevice *pdev = PCI_DEVICE(dev); | |
3633 | BusState *primary_bus; | |
3634 | ||
3635 | if (!pdev->partially_hotplugged) { | |
3636 | return true; | |
3637 | } | |
3638 | primary_bus = dev->parent_bus; | |
3639 | if (!primary_bus) { | |
3640 | error_setg(errp, "virtio_net: couldn't find primary bus"); | |
3641 | return false; | |
3642 | } | |
3643 | qdev_set_parent_bus(dev, primary_bus, &error_abort); | |
3644 | qatomic_set(&n->failover_primary_hidden, false); | |
3645 | hotplug_ctrl = qdev_get_hotplug_handler(dev); | |
3646 | if (hotplug_ctrl) { | |
3647 | hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); | |
3648 | if (err) { | |
3649 | goto out; | |
3650 | } | |
3651 | hotplug_handler_plug(hotplug_ctrl, dev, &err); | |
3652 | } | |
3653 | pdev->partially_hotplugged = false; | |
3654 | ||
3655 | out: | |
3656 | error_propagate(errp, err); | |
3657 | return !err; | |
3658 | } | |
3659 | ||
3660 | static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e) | |
3661 | { | |
3662 | bool should_be_hidden; | |
3663 | Error *err = NULL; | |
3664 | DeviceState *dev = failover_find_primary_device(n); | |
3665 | ||
3666 | if (!dev) { | |
3667 | return; | |
3668 | } | |
3669 | ||
3670 | should_be_hidden = qatomic_read(&n->failover_primary_hidden); | |
3671 | ||
3672 | if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) { | |
3673 | if (failover_unplug_primary(n, dev)) { | |
3674 | vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); | |
3675 | qapi_event_send_unplug_primary(dev->id); | |
3676 | qatomic_set(&n->failover_primary_hidden, true); | |
3677 | } else { | |
3678 | warn_report("couldn't unplug primary device"); | |
3679 | } | |
3680 | } else if (e->type == MIG_EVENT_PRECOPY_FAILED) { | |
3681 | /* We already unplugged the device let's plug it back */ | |
3682 | if (!failover_replug_primary(n, dev, &err)) { | |
3683 | if (err) { | |
3684 | error_report_err(err); | |
3685 | } | |
3686 | } | |
3687 | } | |
3688 | } | |
3689 | ||
3690 | static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier, | |
3691 | MigrationEvent *e, Error **errp) | |
3692 | { | |
3693 | VirtIONet *n = container_of(notifier, VirtIONet, migration_state); | |
3694 | virtio_net_handle_migration_primary(n, e); | |
3695 | return 0; | |
3696 | } | |
3697 | ||
3698 | static bool failover_hide_primary_device(DeviceListener *listener, | |
3699 | const QDict *device_opts, | |
3700 | bool from_json, | |
3701 | Error **errp) | |
3702 | { | |
3703 | VirtIONet *n = container_of(listener, VirtIONet, primary_listener); | |
3704 | const char *standby_id; | |
3705 | ||
3706 | if (!device_opts) { | |
3707 | return false; | |
3708 | } | |
3709 | ||
3710 | if (!qdict_haskey(device_opts, "failover_pair_id")) { | |
3711 | return false; | |
3712 | } | |
3713 | ||
3714 | if (!qdict_haskey(device_opts, "id")) { | |
3715 | error_setg(errp, "Device with failover_pair_id needs to have id"); | |
3716 | return false; | |
3717 | } | |
3718 | ||
3719 | standby_id = qdict_get_str(device_opts, "failover_pair_id"); | |
3720 | if (g_strcmp0(standby_id, n->netclient_name) != 0) { | |
3721 | return false; | |
3722 | } | |
3723 | ||
3724 | /* | |
3725 | * The hide helper can be called several times for a given device. | |
3726 | * Check there is only one primary for a virtio-net device but | |
3727 | * don't duplicate the qdict several times if it's called for the same | |
3728 | * device. | |
3729 | */ | |
3730 | if (n->primary_opts) { | |
3731 | const char *old, *new; | |
3732 | /* devices with failover_pair_id always have an id */ | |
3733 | old = qdict_get_str(n->primary_opts, "id"); | |
3734 | new = qdict_get_str(device_opts, "id"); | |
3735 | if (strcmp(old, new) != 0) { | |
3736 | error_setg(errp, "Cannot attach more than one primary device to " | |
3737 | "'%s': '%s' and '%s'", n->netclient_name, old, new); | |
3738 | return false; | |
3739 | } | |
3740 | } else { | |
3741 | n->primary_opts = qdict_clone_shallow(device_opts); | |
3742 | n->primary_opts_from_json = from_json; | |
3743 | } | |
3744 | ||
3745 | /* failover_primary_hidden is set during feature negotiation */ | |
3746 | return qatomic_read(&n->failover_primary_hidden); | |
3747 | } | |
3748 | ||
3749 | static void virtio_net_device_realize(DeviceState *dev, Error **errp) | |
3750 | { | |
3751 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); | |
3752 | VirtIONet *n = VIRTIO_NET(dev); | |
3753 | NetClientState *nc; | |
3754 | int i; | |
3755 | ||
3756 | if (n->net_conf.mtu) { | |
3757 | n->host_features |= (1ULL << VIRTIO_NET_F_MTU); | |
3758 | } | |
3759 | ||
3760 | if (n->net_conf.duplex_str) { | |
3761 | if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { | |
3762 | n->net_conf.duplex = DUPLEX_HALF; | |
3763 | } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { | |
3764 | n->net_conf.duplex = DUPLEX_FULL; | |
3765 | } else { | |
3766 | error_setg(errp, "'duplex' must be 'half' or 'full'"); | |
3767 | return; | |
3768 | } | |
3769 | n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); | |
3770 | } else { | |
3771 | n->net_conf.duplex = DUPLEX_UNKNOWN; | |
3772 | } | |
3773 | ||
3774 | if (n->net_conf.speed < SPEED_UNKNOWN) { | |
3775 | error_setg(errp, "'speed' must be between 0 and INT_MAX"); | |
3776 | return; | |
3777 | } | |
3778 | if (n->net_conf.speed >= 0) { | |
3779 | n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); | |
3780 | } | |
3781 | ||
3782 | if (n->failover) { | |
3783 | n->primary_listener.hide_device = failover_hide_primary_device; | |
3784 | qatomic_set(&n->failover_primary_hidden, true); | |
3785 | device_listener_register(&n->primary_listener); | |
3786 | migration_add_notifier(&n->migration_state, | |
3787 | virtio_net_migration_state_notifier); | |
3788 | n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); | |
3789 | } | |
3790 | ||
3791 | virtio_net_set_config_size(n, n->host_features); | |
3792 | virtio_init(vdev, VIRTIO_ID_NET, n->config_size); | |
3793 | ||
3794 | /* | |
3795 | * We set a lower limit on RX queue size to what it always was. | |
3796 | * Guests that want a smaller ring can always resize it without | |
3797 | * help from us (using virtio 1 and up). | |
3798 | */ | |
3799 | if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || | |
3800 | n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || | |
3801 | !is_power_of_2(n->net_conf.rx_queue_size)) { | |
3802 | error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " | |
3803 | "must be a power of 2 between %d and %d.", | |
3804 | n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, | |
3805 | VIRTQUEUE_MAX_SIZE); | |
3806 | virtio_cleanup(vdev); | |
3807 | return; | |
3808 | } | |
3809 | ||
3810 | if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || | |
3811 | n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || | |
3812 | !is_power_of_2(n->net_conf.tx_queue_size)) { | |
3813 | error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " | |
3814 | "must be a power of 2 between %d and %d", | |
3815 | n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, | |
3816 | virtio_net_max_tx_queue_size(n)); | |
3817 | virtio_cleanup(vdev); | |
3818 | return; | |
3819 | } | |
3820 | ||
3821 | n->max_ncs = MAX(n->nic_conf.peers.queues, 1); | |
3822 | ||
3823 | /* | |
3824 | * Figure out the datapath queue pairs since the backend could | |
3825 | * provide control queue via peers as well. | |
3826 | */ | |
3827 | if (n->nic_conf.peers.queues) { | |
3828 | for (i = 0; i < n->max_ncs; i++) { | |
3829 | if (n->nic_conf.peers.ncs[i]->is_datapath) { | |
3830 | ++n->max_queue_pairs; | |
3831 | } | |
3832 | } | |
3833 | } | |
3834 | n->max_queue_pairs = MAX(n->max_queue_pairs, 1); | |
3835 | ||
3836 | if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { | |
3837 | error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " | |
3838 | "must be a positive integer less than %d.", | |
3839 | n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); | |
3840 | virtio_cleanup(vdev); | |
3841 | return; | |
3842 | } | |
3843 | n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); | |
3844 | n->curr_queue_pairs = 1; | |
3845 | n->tx_timeout = n->net_conf.txtimer; | |
3846 | ||
3847 | if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") | |
3848 | && strcmp(n->net_conf.tx, "bh")) { | |
3849 | warn_report("virtio-net: " | |
3850 | "Unknown option tx=%s, valid options: \"timer\" \"bh\"", | |
3851 | n->net_conf.tx); | |
3852 | error_printf("Defaulting to \"bh\""); | |
3853 | } | |
3854 | ||
3855 | n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), | |
3856 | n->net_conf.tx_queue_size); | |
3857 | ||
3858 | virtio_net_add_queue(n, 0); | |
3859 | ||
3860 | n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); | |
3861 | qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); | |
3862 | memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); | |
3863 | n->status = VIRTIO_NET_S_LINK_UP; | |
3864 | qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), | |
3865 | QEMU_CLOCK_VIRTUAL, | |
3866 | virtio_net_announce_timer, n); | |
3867 | n->announce_timer.round = 0; | |
3868 | ||
3869 | if (n->netclient_type) { | |
3870 | /* | |
3871 | * Happen when virtio_net_set_netclient_name has been called. | |
3872 | */ | |
3873 | n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, | |
3874 | n->netclient_type, n->netclient_name, | |
3875 | &dev->mem_reentrancy_guard, n); | |
3876 | } else { | |
3877 | n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, | |
3878 | object_get_typename(OBJECT(dev)), dev->id, | |
3879 | &dev->mem_reentrancy_guard, n); | |
3880 | } | |
3881 | ||
3882 | for (i = 0; i < n->max_queue_pairs; i++) { | |
3883 | n->nic->ncs[i].do_not_pad = true; | |
3884 | } | |
3885 | ||
3886 | peer_test_vnet_hdr(n); | |
3887 | if (peer_has_vnet_hdr(n)) { | |
3888 | n->host_hdr_len = sizeof(struct virtio_net_hdr); | |
3889 | } else { | |
3890 | n->host_hdr_len = 0; | |
3891 | } | |
3892 | ||
3893 | qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); | |
3894 | ||
3895 | n->vqs[0].tx_waiting = 0; | |
3896 | n->tx_burst = n->net_conf.txburst; | |
3897 | virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); | |
3898 | n->promisc = 1; /* for compatibility */ | |
3899 | ||
3900 | n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); | |
3901 | ||
3902 | n->vlans = g_malloc0(MAX_VLAN >> 3); | |
3903 | ||
3904 | nc = qemu_get_queue(n->nic); | |
3905 | nc->rxfilter_notify_enabled = 1; | |
3906 | ||
3907 | if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { | |
3908 | struct virtio_net_config netcfg = {}; | |
3909 | memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); | |
3910 | vhost_net_set_config(get_vhost_net(nc->peer), | |
3911 | (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND); | |
3912 | } | |
3913 | QTAILQ_INIT(&n->rsc_chains); | |
3914 | n->qdev = dev; | |
3915 | ||
3916 | net_rx_pkt_init(&n->rx_pkt); | |
3917 | ||
3918 | if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { | |
3919 | virtio_net_load_ebpf(n, errp); | |
3920 | } | |
3921 | } | |
3922 | ||
3923 | static void virtio_net_device_unrealize(DeviceState *dev) | |
3924 | { | |
3925 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); | |
3926 | VirtIONet *n = VIRTIO_NET(dev); | |
3927 | int i, max_queue_pairs; | |
3928 | ||
3929 | if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { | |
3930 | virtio_net_unload_ebpf(n); | |
3931 | } | |
3932 | ||
3933 | /* This will stop vhost backend if appropriate. */ | |
3934 | virtio_net_set_status(vdev, 0); | |
3935 | ||
3936 | g_free(n->netclient_name); | |
3937 | n->netclient_name = NULL; | |
3938 | g_free(n->netclient_type); | |
3939 | n->netclient_type = NULL; | |
3940 | ||
3941 | g_free(n->mac_table.macs); | |
3942 | g_free(n->vlans); | |
3943 | ||
3944 | if (n->failover) { | |
3945 | qobject_unref(n->primary_opts); | |
3946 | device_listener_unregister(&n->primary_listener); | |
3947 | migration_remove_notifier(&n->migration_state); | |
3948 | } else { | |
3949 | assert(n->primary_opts == NULL); | |
3950 | } | |
3951 | ||
3952 | max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; | |
3953 | for (i = 0; i < max_queue_pairs; i++) { | |
3954 | virtio_net_del_queue(n, i); | |
3955 | } | |
3956 | /* delete also control vq */ | |
3957 | virtio_del_queue(vdev, max_queue_pairs * 2); | |
3958 | qemu_announce_timer_del(&n->announce_timer, false); | |
3959 | g_free(n->vqs); | |
3960 | qemu_del_nic(n->nic); | |
3961 | virtio_net_rsc_cleanup(n); | |
3962 | g_free(n->rss_data.indirections_table); | |
3963 | net_rx_pkt_uninit(n->rx_pkt); | |
3964 | virtio_cleanup(vdev); | |
3965 | } | |
3966 | ||
3967 | static void virtio_net_reset(VirtIODevice *vdev) | |
3968 | { | |
3969 | VirtIONet *n = VIRTIO_NET(vdev); | |
3970 | int i; | |
3971 | ||
3972 | /* Reset back to compatibility mode */ | |
3973 | n->promisc = 1; | |
3974 | n->allmulti = 0; | |
3975 | n->alluni = 0; | |
3976 | n->nomulti = 0; | |
3977 | n->nouni = 0; | |
3978 | n->nobcast = 0; | |
3979 | /* multiqueue is disabled by default */ | |
3980 | n->curr_queue_pairs = 1; | |
3981 | timer_del(n->announce_timer.tm); | |
3982 | n->announce_timer.round = 0; | |
3983 | n->status &= ~VIRTIO_NET_S_ANNOUNCE; | |
3984 | ||
3985 | /* Flush any MAC and VLAN filter table state */ | |
3986 | n->mac_table.in_use = 0; | |
3987 | n->mac_table.first_multi = 0; | |
3988 | n->mac_table.multi_overflow = 0; | |
3989 | n->mac_table.uni_overflow = 0; | |
3990 | memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); | |
3991 | memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); | |
3992 | qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); | |
3993 | memset(n->vlans, 0, MAX_VLAN >> 3); | |
3994 | ||
3995 | /* Flush any async TX */ | |
3996 | for (i = 0; i < n->max_queue_pairs; i++) { | |
3997 | flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); | |
3998 | } | |
3999 | ||
4000 | virtio_net_disable_rss(n); | |
4001 | } | |
4002 | ||
4003 | static void virtio_net_instance_init(Object *obj) | |
4004 | { | |
4005 | VirtIONet *n = VIRTIO_NET(obj); | |
4006 | ||
4007 | /* | |
4008 | * The default config_size is sizeof(struct virtio_net_config). | |
4009 | * Can be overridden with virtio_net_set_config_size. | |
4010 | */ | |
4011 | n->config_size = sizeof(struct virtio_net_config); | |
4012 | device_add_bootindex_property(obj, &n->nic_conf.bootindex, | |
4013 | "bootindex", "/ethernet-phy@0", | |
4014 | DEVICE(n)); | |
4015 | ||
4016 | ebpf_rss_init(&n->ebpf_rss); | |
4017 | } | |
4018 | ||
4019 | static int virtio_net_pre_save(void *opaque) | |
4020 | { | |
4021 | VirtIONet *n = opaque; | |
4022 | ||
4023 | /* At this point, backend must be stopped, otherwise | |
4024 | * it might keep writing to memory. */ | |
4025 | assert(!n->vhost_started); | |
4026 | ||
4027 | return 0; | |
4028 | } | |
4029 | ||
4030 | static bool primary_unplug_pending(void *opaque) | |
4031 | { | |
4032 | DeviceState *dev = opaque; | |
4033 | DeviceState *primary; | |
4034 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); | |
4035 | VirtIONet *n = VIRTIO_NET(vdev); | |
4036 | ||
4037 | if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { | |
4038 | return false; | |
4039 | } | |
4040 | primary = failover_find_primary_device(n); | |
4041 | return primary ? primary->pending_deleted_event : false; | |
4042 | } | |
4043 | ||
4044 | static bool dev_unplug_pending(void *opaque) | |
4045 | { | |
4046 | DeviceState *dev = opaque; | |
4047 | VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); | |
4048 | ||
4049 | return vdc->primary_unplug_pending(dev); | |
4050 | } | |
4051 | ||
4052 | static const VMStateDescription vmstate_virtio_net = { | |
4053 | .name = "virtio-net", | |
4054 | .minimum_version_id = VIRTIO_NET_VM_VERSION, | |
4055 | .version_id = VIRTIO_NET_VM_VERSION, | |
4056 | .fields = (const VMStateField[]) { | |
4057 | VMSTATE_VIRTIO_DEVICE, | |
4058 | VMSTATE_END_OF_LIST() | |
4059 | }, | |
4060 | .pre_save = virtio_net_pre_save, | |
4061 | .dev_unplug_pending = dev_unplug_pending, | |
4062 | }; | |
4063 | ||
4064 | static const Property virtio_net_properties[] = { | |
4065 | DEFINE_PROP_BIT64("csum", VirtIONet, host_features, | |
4066 | VIRTIO_NET_F_CSUM, true), | |
4067 | DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, | |
4068 | VIRTIO_NET_F_GUEST_CSUM, true), | |
4069 | DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), | |
4070 | DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, | |
4071 | VIRTIO_NET_F_GUEST_TSO4, true), | |
4072 | DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, | |
4073 | VIRTIO_NET_F_GUEST_TSO6, true), | |
4074 | DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, | |
4075 | VIRTIO_NET_F_GUEST_ECN, true), | |
4076 | DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, | |
4077 | VIRTIO_NET_F_GUEST_UFO, true), | |
4078 | DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, | |
4079 | VIRTIO_NET_F_GUEST_ANNOUNCE, true), | |
4080 | DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, | |
4081 | VIRTIO_NET_F_HOST_TSO4, true), | |
4082 | DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, | |
4083 | VIRTIO_NET_F_HOST_TSO6, true), | |
4084 | DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, | |
4085 | VIRTIO_NET_F_HOST_ECN, true), | |
4086 | DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, | |
4087 | VIRTIO_NET_F_HOST_UFO, true), | |
4088 | DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, | |
4089 | VIRTIO_NET_F_MRG_RXBUF, true), | |
4090 | DEFINE_PROP_BIT64("status", VirtIONet, host_features, | |
4091 | VIRTIO_NET_F_STATUS, true), | |
4092 | DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, | |
4093 | VIRTIO_NET_F_CTRL_VQ, true), | |
4094 | DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, | |
4095 | VIRTIO_NET_F_CTRL_RX, true), | |
4096 | DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, | |
4097 | VIRTIO_NET_F_CTRL_VLAN, true), | |
4098 | DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, | |
4099 | VIRTIO_NET_F_CTRL_RX_EXTRA, true), | |
4100 | DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, | |
4101 | VIRTIO_NET_F_CTRL_MAC_ADDR, true), | |
4102 | DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, | |
4103 | VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), | |
4104 | DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), | |
4105 | DEFINE_PROP_BIT64("rss", VirtIONet, host_features, | |
4106 | VIRTIO_NET_F_RSS, false), | |
4107 | DEFINE_PROP_BIT64("hash", VirtIONet, host_features, | |
4108 | VIRTIO_NET_F_HASH_REPORT, false), | |
4109 | DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds, | |
4110 | ebpf_rss_fds, qdev_prop_string, char*), | |
4111 | DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, | |
4112 | VIRTIO_NET_F_RSC_EXT, false), | |
4113 | DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, | |
4114 | VIRTIO_NET_RSC_DEFAULT_INTERVAL), | |
4115 | DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), | |
4116 | DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, | |
4117 | TX_TIMER_INTERVAL), | |
4118 | DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), | |
4119 | DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), | |
4120 | DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, | |
4121 | VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), | |
4122 | DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, | |
4123 | VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), | |
4124 | DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), | |
4125 | DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, | |
4126 | true), | |
4127 | DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), | |
4128 | DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), | |
4129 | DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), | |
4130 | DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features, | |
4131 | VIRTIO_NET_F_GUEST_USO4, true), | |
4132 | DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features, | |
4133 | VIRTIO_NET_F_GUEST_USO6, true), | |
4134 | DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features, | |
4135 | VIRTIO_NET_F_HOST_USO, true), | |
4136 | }; | |
4137 | ||
4138 | static void virtio_net_class_init(ObjectClass *klass, const void *data) | |
4139 | { | |
4140 | DeviceClass *dc = DEVICE_CLASS(klass); | |
4141 | VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); | |
4142 | ||
4143 | device_class_set_props(dc, virtio_net_properties); | |
4144 | dc->vmsd = &vmstate_virtio_net; | |
4145 | set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); | |
4146 | vdc->realize = virtio_net_device_realize; | |
4147 | vdc->unrealize = virtio_net_device_unrealize; | |
4148 | vdc->get_config = virtio_net_get_config; | |
4149 | vdc->set_config = virtio_net_set_config; | |
4150 | vdc->get_features = virtio_net_get_features; | |
4151 | vdc->set_features = virtio_net_set_features; | |
4152 | vdc->bad_features = virtio_net_bad_features; | |
4153 | vdc->reset = virtio_net_reset; | |
4154 | vdc->queue_reset = virtio_net_queue_reset; | |
4155 | vdc->queue_enable = virtio_net_queue_enable; | |
4156 | vdc->set_status = virtio_net_set_status; | |
4157 | vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; | |
4158 | vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; | |
4159 | vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); | |
4160 | vdc->pre_load_queues = virtio_net_pre_load_queues; | |
4161 | vdc->post_load = virtio_net_post_load_virtio; | |
4162 | vdc->vmsd = &vmstate_virtio_net_device; | |
4163 | vdc->primary_unplug_pending = primary_unplug_pending; | |
4164 | vdc->get_vhost = virtio_net_get_vhost; | |
4165 | vdc->toggle_device_iotlb = vhost_toggle_device_iotlb; | |
4166 | } | |
4167 | ||
4168 | static const TypeInfo virtio_net_info = { | |
4169 | .name = TYPE_VIRTIO_NET, | |
4170 | .parent = TYPE_VIRTIO_DEVICE, | |
4171 | .instance_size = sizeof(VirtIONet), | |
4172 | .instance_init = virtio_net_instance_init, | |
4173 | .class_init = virtio_net_class_init, | |
4174 | }; | |
4175 | ||
4176 | static void virtio_register_types(void) | |
4177 | { | |
4178 | type_register_static(&virtio_net_info); | |
4179 | } | |
4180 | ||
4181 | type_init(virtio_register_types) |