]> git.ipfire.org Git - thirdparty/qemu.git/blame - hw/net/virtio-net.c
virtio-blk: add "discard" and "write-zeroes" properties
[thirdparty/qemu.git] / hw / net / virtio-net.c
CommitLineData
fbe78f4f
AL
1/*
2 * Virtio Network Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
9b8bfe21 14#include "qemu/osdep.h"
1de7afc9 15#include "qemu/iov.h"
0d09e41a 16#include "hw/virtio/virtio.h"
1422e32d 17#include "net/net.h"
7200ac3c 18#include "net/checksum.h"
a8ed73f7 19#include "net/tap.h"
1de7afc9
PB
20#include "qemu/error-report.h"
21#include "qemu/timer.h"
0d09e41a
PB
22#include "hw/virtio/virtio-net.h"
23#include "net/vhost_net.h"
17ec5a86 24#include "hw/virtio/virtio-bus.h"
e688df6b 25#include "qapi/error.h"
9af23989 26#include "qapi/qapi-events-net.h"
1399c60d 27#include "hw/virtio/virtio-access.h"
f8d806c9 28#include "migration/misc.h"
9473939e 29#include "standard-headers/linux/ethtool.h"
fbe78f4f 30
0ce0e8f4 31#define VIRTIO_NET_VM_VERSION 11
b6503ed9 32
4ffb17f5 33#define MAC_TABLE_ENTRIES 64
f21c0ed9 34#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
9d6271b8 35
1c0fbfa3
MT
36/* previously fixed value */
37#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
9b02e161
WW
38#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
39
1c0fbfa3
MT
40/* for now, only allow larger queues; with virtio-1, guest can downsize */
41#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
9b02e161 42#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
1c0fbfa3 43
2974e916
YB
44#define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
45
46#define VIRTIO_NET_TCP_FLAG 0x3F
47#define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
48
49/* IPv4 max payload, 16 bits in the header */
50#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
51#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
52
53/* header length value in ip header without option */
54#define VIRTIO_NET_IP4_HEADER_LENGTH 5
55
56#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
57#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
58
59/* Purge coalesced packets timer interval, This value affects the performance
60 a lot, and should be tuned carefully, '300000'(300us) is the recommended
61 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
62 tso/gso/gro 'off'. */
63#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
64
65/* temporary until standard header include it */
66#if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
67
68#define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */
d47e5e31 69#define VIRTIO_NET_F_RSC_EXT 61
2974e916
YB
70
71static inline __virtio16 *virtio_net_rsc_ext_num_packets(
72 struct virtio_net_hdr *hdr)
73{
74 return &hdr->csum_start;
75}
76
77static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
78 struct virtio_net_hdr *hdr)
79{
80 return &hdr->csum_offset;
81}
82
83#endif
84
14f9b664
JL
85/*
86 * Calculate the number of bytes up to and including the given 'field' of
87 * 'container'.
88 */
89#define endof(container, field) \
f18793b0 90 (offsetof(container, field) + sizeof_field(container, field))
14f9b664
JL
91
92typedef struct VirtIOFeature {
127833ee 93 uint64_t flags;
14f9b664
JL
94 size_t end;
95} VirtIOFeature;
96
97static VirtIOFeature feature_sizes[] = {
127833ee 98 {.flags = 1ULL << VIRTIO_NET_F_MAC,
14f9b664 99 .end = endof(struct virtio_net_config, mac)},
127833ee 100 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
14f9b664 101 .end = endof(struct virtio_net_config, status)},
127833ee 102 {.flags = 1ULL << VIRTIO_NET_F_MQ,
14f9b664 103 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
127833ee 104 {.flags = 1ULL << VIRTIO_NET_F_MTU,
a93e599d 105 .end = endof(struct virtio_net_config, mtu)},
9473939e
JB
106 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
107 .end = endof(struct virtio_net_config, duplex)},
14f9b664
JL
108 {}
109};
110
fed699f9 111static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
0c87e93e
JW
112{
113 VirtIONet *n = qemu_get_nic_opaque(nc);
114
fed699f9 115 return &n->vqs[nc->queue_index];
0c87e93e 116}
fed699f9
JW
117
118static int vq2q(int queue_index)
119{
120 return queue_index / 2;
121}
122
fbe78f4f
AL
123/* TODO
124 * - we could suppress RX interrupt if we were so inclined.
125 */
126
0f03eca6 127static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
fbe78f4f 128{
17a0ca55 129 VirtIONet *n = VIRTIO_NET(vdev);
fbe78f4f
AL
130 struct virtio_net_config netcfg;
131
1399c60d
RR
132 virtio_stw_p(vdev, &netcfg.status, n->status);
133 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
a93e599d 134 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
79674068 135 memcpy(netcfg.mac, n->mac, ETH_ALEN);
9473939e
JB
136 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
137 netcfg.duplex = n->net_conf.duplex;
14f9b664 138 memcpy(config, &netcfg, n->config_size);
fbe78f4f
AL
139}
140
0f03eca6
AL
141static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
142{
17a0ca55 143 VirtIONet *n = VIRTIO_NET(vdev);
14f9b664 144 struct virtio_net_config netcfg = {};
0f03eca6 145
14f9b664 146 memcpy(&netcfg, config, n->config_size);
0f03eca6 147
95129d6f
CH
148 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
149 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
c1943a3f 150 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
79674068 151 memcpy(n->mac, netcfg.mac, ETH_ALEN);
b356f76d 152 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
0f03eca6
AL
153 }
154}
155
783e7706
MT
156static bool virtio_net_started(VirtIONet *n, uint8_t status)
157{
17a0ca55 158 VirtIODevice *vdev = VIRTIO_DEVICE(n);
783e7706 159 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
17a0ca55 160 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
783e7706
MT
161}
162
f57fcf70
JW
163static void virtio_net_announce_timer(void *opaque)
164{
165 VirtIONet *n = opaque;
166 VirtIODevice *vdev = VIRTIO_DEVICE(n);
167
168 n->announce_counter--;
169 n->status |= VIRTIO_NET_S_ANNOUNCE;
170 virtio_notify_config(vdev);
171}
172
783e7706 173static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
afbaa7b4 174{
17a0ca55 175 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b356f76d 176 NetClientState *nc = qemu_get_queue(n->nic);
fed699f9 177 int queues = n->multiqueue ? n->max_queues : 1;
b356f76d 178
ed8b4afe 179 if (!get_vhost_net(nc->peer)) {
afbaa7b4
MT
180 return;
181 }
fed699f9 182
8c1ac475
RK
183 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
184 !!n->vhost_started) {
afbaa7b4
MT
185 return;
186 }
187 if (!n->vhost_started) {
086abc1c
MT
188 int r, i;
189
1bfa316c
GK
190 if (n->needs_vnet_hdr_swap) {
191 error_report("backend does not support %s vnet headers; "
192 "falling back on userspace virtio",
193 virtio_is_big_endian(vdev) ? "BE" : "LE");
194 return;
195 }
196
086abc1c
MT
197 /* Any packets outstanding? Purge them to avoid touching rings
198 * when vhost is running.
199 */
200 for (i = 0; i < queues; i++) {
201 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
202
203 /* Purge both directions: TX and RX. */
204 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
205 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
206 }
207
a93e599d
MC
208 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
209 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
210 if (r < 0) {
211 error_report("%uBytes MTU not supported by the backend",
212 n->net_conf.mtu);
213
214 return;
215 }
216 }
217
1830b80f 218 n->vhost_started = 1;
17a0ca55 219 r = vhost_net_start(vdev, n->nic->ncs, queues);
afbaa7b4 220 if (r < 0) {
e7b43f7e
SH
221 error_report("unable to start vhost net: %d: "
222 "falling back on userspace virtio", -r);
1830b80f 223 n->vhost_started = 0;
afbaa7b4
MT
224 }
225 } else {
17a0ca55 226 vhost_net_stop(vdev, n->nic->ncs, queues);
afbaa7b4
MT
227 n->vhost_started = 0;
228 }
229}
230
1bfa316c
GK
231static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
232 NetClientState *peer,
233 bool enable)
234{
235 if (virtio_is_big_endian(vdev)) {
236 return qemu_set_vnet_be(peer, enable);
237 } else {
238 return qemu_set_vnet_le(peer, enable);
239 }
240}
241
242static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
243 int queues, bool enable)
244{
245 int i;
246
247 for (i = 0; i < queues; i++) {
248 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
249 enable) {
250 while (--i >= 0) {
251 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
252 }
253
254 return true;
255 }
256 }
257
258 return false;
259}
260
261static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
262{
263 VirtIODevice *vdev = VIRTIO_DEVICE(n);
264 int queues = n->multiqueue ? n->max_queues : 1;
265
266 if (virtio_net_started(n, status)) {
267 /* Before using the device, we tell the network backend about the
268 * endianness to use when parsing vnet headers. If the backend
269 * can't do it, we fallback onto fixing the headers in the core
270 * virtio-net code.
271 */
272 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
273 queues, true);
274 } else if (virtio_net_started(n, vdev->status)) {
275 /* After using the device, we need to reset the network backend to
276 * the default (guest native endianness), otherwise the guest may
277 * lose network connectivity if it is rebooted into a different
278 * endianness.
279 */
280 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
281 }
282}
283
283e2c2a
YB
284static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
285{
286 unsigned int dropped = virtqueue_drop_all(vq);
287 if (dropped) {
288 virtio_notify(vdev, vq);
289 }
290}
291
783e7706
MT
292static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
293{
17a0ca55 294 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9
JW
295 VirtIONetQueue *q;
296 int i;
297 uint8_t queue_status;
783e7706 298
1bfa316c 299 virtio_net_vnet_endian_status(n, status);
783e7706
MT
300 virtio_net_vhost_status(n, status);
301
fed699f9 302 for (i = 0; i < n->max_queues; i++) {
38705bb5
FZ
303 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
304 bool queue_started;
fed699f9 305 q = &n->vqs[i];
783e7706 306
fed699f9
JW
307 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
308 queue_status = 0;
783e7706 309 } else {
fed699f9 310 queue_status = status;
783e7706 311 }
38705bb5
FZ
312 queue_started =
313 virtio_net_started(n, queue_status) && !n->vhost_started;
314
315 if (queue_started) {
316 qemu_flush_queued_packets(ncs);
317 }
fed699f9
JW
318
319 if (!q->tx_waiting) {
320 continue;
321 }
322
38705bb5 323 if (queue_started) {
fed699f9 324 if (q->tx_timer) {
bc72ad67
AB
325 timer_mod(q->tx_timer,
326 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
fed699f9
JW
327 } else {
328 qemu_bh_schedule(q->tx_bh);
329 }
783e7706 330 } else {
fed699f9 331 if (q->tx_timer) {
bc72ad67 332 timer_del(q->tx_timer);
fed699f9
JW
333 } else {
334 qemu_bh_cancel(q->tx_bh);
335 }
283e2c2a 336 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
70e53e6e
JW
337 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
338 vdev->vm_running) {
283e2c2a
YB
339 /* if tx is waiting we are likely have some packets in tx queue
340 * and disabled notification */
341 q->tx_waiting = 0;
342 virtio_queue_set_notification(q->tx_vq, 1);
343 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
344 }
783e7706
MT
345 }
346 }
347}
348
4e68f7a0 349static void virtio_net_set_link_status(NetClientState *nc)
554c97dd 350{
cc1f0f45 351 VirtIONet *n = qemu_get_nic_opaque(nc);
17a0ca55 352 VirtIODevice *vdev = VIRTIO_DEVICE(n);
554c97dd
AL
353 uint16_t old_status = n->status;
354
eb6b6c12 355 if (nc->link_down)
554c97dd
AL
356 n->status &= ~VIRTIO_NET_S_LINK_UP;
357 else
358 n->status |= VIRTIO_NET_S_LINK_UP;
359
360 if (n->status != old_status)
17a0ca55 361 virtio_notify_config(vdev);
afbaa7b4 362
17a0ca55 363 virtio_net_set_status(vdev, vdev->status);
554c97dd
AL
364}
365
b1be4280
AK
366static void rxfilter_notify(NetClientState *nc)
367{
b1be4280
AK
368 VirtIONet *n = qemu_get_nic_opaque(nc);
369
370 if (nc->rxfilter_notify_enabled) {
96e35046 371 gchar *path = object_get_canonical_path(OBJECT(n->qdev));
06150279 372 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
3ab72385 373 n->netclient_name, path);
96e35046 374 g_free(path);
b1be4280
AK
375
376 /* disable event notification to avoid events flooding */
377 nc->rxfilter_notify_enabled = 0;
378 }
379}
380
f7bc8ef8
AK
381static intList *get_vlan_table(VirtIONet *n)
382{
383 intList *list, *entry;
384 int i, j;
385
386 list = NULL;
387 for (i = 0; i < MAX_VLAN >> 5; i++) {
388 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
389 if (n->vlans[i] & (1U << j)) {
390 entry = g_malloc0(sizeof(*entry));
391 entry->value = (i << 5) + j;
392 entry->next = list;
393 list = entry;
394 }
395 }
396 }
397
398 return list;
399}
400
b1be4280
AK
401static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
402{
403 VirtIONet *n = qemu_get_nic_opaque(nc);
f7bc8ef8 404 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b1be4280
AK
405 RxFilterInfo *info;
406 strList *str_list, *entry;
f7bc8ef8 407 int i;
b1be4280
AK
408
409 info = g_malloc0(sizeof(*info));
410 info->name = g_strdup(nc->name);
411 info->promiscuous = n->promisc;
412
413 if (n->nouni) {
414 info->unicast = RX_STATE_NONE;
415 } else if (n->alluni) {
416 info->unicast = RX_STATE_ALL;
417 } else {
418 info->unicast = RX_STATE_NORMAL;
419 }
420
421 if (n->nomulti) {
422 info->multicast = RX_STATE_NONE;
423 } else if (n->allmulti) {
424 info->multicast = RX_STATE_ALL;
425 } else {
426 info->multicast = RX_STATE_NORMAL;
427 }
428
429 info->broadcast_allowed = n->nobcast;
430 info->multicast_overflow = n->mac_table.multi_overflow;
431 info->unicast_overflow = n->mac_table.uni_overflow;
432
b0575ba4 433 info->main_mac = qemu_mac_strdup_printf(n->mac);
b1be4280
AK
434
435 str_list = NULL;
436 for (i = 0; i < n->mac_table.first_multi; i++) {
437 entry = g_malloc0(sizeof(*entry));
b0575ba4 438 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
b1be4280
AK
439 entry->next = str_list;
440 str_list = entry;
441 }
442 info->unicast_table = str_list;
443
444 str_list = NULL;
445 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
446 entry = g_malloc0(sizeof(*entry));
b0575ba4 447 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
b1be4280
AK
448 entry->next = str_list;
449 str_list = entry;
450 }
451 info->multicast_table = str_list;
f7bc8ef8 452 info->vlan_table = get_vlan_table(n);
b1be4280 453
95129d6f 454 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
f7bc8ef8
AK
455 info->vlan = RX_STATE_ALL;
456 } else if (!info->vlan_table) {
457 info->vlan = RX_STATE_NONE;
458 } else {
459 info->vlan = RX_STATE_NORMAL;
b1be4280 460 }
b1be4280
AK
461
462 /* enable event notification after query */
463 nc->rxfilter_notify_enabled = 1;
464
465 return info;
466}
467
002437cd
AL
468static void virtio_net_reset(VirtIODevice *vdev)
469{
17a0ca55 470 VirtIONet *n = VIRTIO_NET(vdev);
94b52958 471 int i;
002437cd
AL
472
473 /* Reset back to compatibility mode */
474 n->promisc = 1;
475 n->allmulti = 0;
015cb166
AW
476 n->alluni = 0;
477 n->nomulti = 0;
478 n->nouni = 0;
479 n->nobcast = 0;
fed699f9
JW
480 /* multiqueue is disabled by default */
481 n->curr_queues = 1;
f57fcf70
JW
482 timer_del(n->announce_timer);
483 n->announce_counter = 0;
484 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
b6503ed9 485
f21c0ed9 486 /* Flush any MAC and VLAN filter table state */
b6503ed9 487 n->mac_table.in_use = 0;
2d9aba39 488 n->mac_table.first_multi = 0;
8fd2a2f1
AW
489 n->mac_table.multi_overflow = 0;
490 n->mac_table.uni_overflow = 0;
b6503ed9 491 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
41dc8a67 492 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
702d66a8 493 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
f21c0ed9 494 memset(n->vlans, 0, MAX_VLAN >> 3);
94b52958
GK
495
496 /* Flush any async TX */
497 for (i = 0; i < n->max_queues; i++) {
498 NetClientState *nc = qemu_get_subqueue(n->nic, i);
499
500 if (nc->peer) {
501 qemu_flush_or_purge_queued_packets(nc->peer, true);
502 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
503 }
504 }
002437cd
AL
505}
506
6e371ab8 507static void peer_test_vnet_hdr(VirtIONet *n)
3a330134 508{
b356f76d
JW
509 NetClientState *nc = qemu_get_queue(n->nic);
510 if (!nc->peer) {
6e371ab8 511 return;
b356f76d 512 }
3a330134 513
d6085e3a 514 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
6e371ab8 515}
3a330134 516
6e371ab8
MT
517static int peer_has_vnet_hdr(VirtIONet *n)
518{
3a330134
MM
519 return n->has_vnet_hdr;
520}
521
0ce0e8f4
MM
522static int peer_has_ufo(VirtIONet *n)
523{
524 if (!peer_has_vnet_hdr(n))
525 return 0;
526
d6085e3a 527 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
0ce0e8f4
MM
528
529 return n->has_ufo;
530}
531
bb9d17f8
CH
532static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
533 int version_1)
ff3a8066 534{
fed699f9
JW
535 int i;
536 NetClientState *nc;
537
ff3a8066
MT
538 n->mergeable_rx_bufs = mergeable_rx_bufs;
539
bb9d17f8
CH
540 if (version_1) {
541 n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
542 } else {
543 n->guest_hdr_len = n->mergeable_rx_bufs ?
544 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
545 sizeof(struct virtio_net_hdr);
546 }
ff3a8066 547
fed699f9
JW
548 for (i = 0; i < n->max_queues; i++) {
549 nc = qemu_get_subqueue(n->nic, i);
550
551 if (peer_has_vnet_hdr(n) &&
d6085e3a
SH
552 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
553 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
fed699f9
JW
554 n->host_hdr_len = n->guest_hdr_len;
555 }
ff3a8066
MT
556 }
557}
558
2eef278b
MT
559static int virtio_net_max_tx_queue_size(VirtIONet *n)
560{
561 NetClientState *peer = n->nic_conf.peers.ncs[0];
562
563 /*
564 * Backends other than vhost-user don't support max queue size.
565 */
566 if (!peer) {
567 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
568 }
569
570 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
571 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
572 }
573
574 return VIRTQUEUE_MAX_SIZE;
575}
576
fed699f9
JW
577static int peer_attach(VirtIONet *n, int index)
578{
579 NetClientState *nc = qemu_get_subqueue(n->nic, index);
580
581 if (!nc->peer) {
582 return 0;
583 }
584
f394b2e2 585 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
7263a0ad
CO
586 vhost_set_vring_enable(nc->peer, 1);
587 }
588
f394b2e2 589 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
fed699f9
JW
590 return 0;
591 }
592
1074b879
JW
593 if (n->max_queues == 1) {
594 return 0;
595 }
596
fed699f9
JW
597 return tap_enable(nc->peer);
598}
599
600static int peer_detach(VirtIONet *n, int index)
601{
602 NetClientState *nc = qemu_get_subqueue(n->nic, index);
603
604 if (!nc->peer) {
605 return 0;
606 }
607
f394b2e2 608 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
7263a0ad
CO
609 vhost_set_vring_enable(nc->peer, 0);
610 }
611
f394b2e2 612 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
fed699f9
JW
613 return 0;
614 }
615
616 return tap_disable(nc->peer);
617}
618
619static void virtio_net_set_queues(VirtIONet *n)
620{
621 int i;
ddfa83ea 622 int r;
fed699f9 623
68b5f314
YB
624 if (n->nic->peer_deleted) {
625 return;
626 }
627
fed699f9
JW
628 for (i = 0; i < n->max_queues; i++) {
629 if (i < n->curr_queues) {
ddfa83ea
JS
630 r = peer_attach(n, i);
631 assert(!r);
fed699f9 632 } else {
ddfa83ea
JS
633 r = peer_detach(n, i);
634 assert(!r);
fed699f9
JW
635 }
636 }
637}
638
ec57db16 639static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
fed699f9 640
9d5b731d
JW
641static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
642 Error **errp)
fbe78f4f 643{
17a0ca55 644 VirtIONet *n = VIRTIO_NET(vdev);
b356f76d 645 NetClientState *nc = qemu_get_queue(n->nic);
fbe78f4f 646
da3e8a23
SZ
647 /* Firstly sync all virtio-net possible supported features */
648 features |= n->host_features;
649
0cd09c3a 650 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
c9f79a3f 651
6e371ab8 652 if (!peer_has_vnet_hdr(n)) {
0cd09c3a
CH
653 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
654 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
655 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
656 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
8172539d 657
0cd09c3a
CH
658 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
659 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
660 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
661 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
8172539d 662 }
3a330134 663
8172539d 664 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
0cd09c3a
CH
665 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
666 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
3a330134
MM
667 }
668
ed8b4afe 669 if (!get_vhost_net(nc->peer)) {
9bc6304c
MT
670 return features;
671 }
2974e916 672
75ebec11
MC
673 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
674 vdev->backend_features = features;
675
676 if (n->mtu_bypass_backend &&
677 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
678 features |= (1ULL << VIRTIO_NET_F_MTU);
679 }
680
681 return features;
fbe78f4f
AL
682}
683
019a3edb 684static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
8eca6b1b 685{
019a3edb 686 uint64_t features = 0;
8eca6b1b
AL
687
688 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
689 * but also these: */
0cd09c3a
CH
690 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
691 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
692 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
693 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
694 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
8eca6b1b 695
8172539d 696 return features;
8eca6b1b
AL
697}
698
644c9858
DF
699static void virtio_net_apply_guest_offloads(VirtIONet *n)
700{
ad37bb3b 701 qemu_set_offload(qemu_get_queue(n->nic)->peer,
644c9858
DF
702 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
703 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
704 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
705 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
706 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
707}
708
709static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
710{
711 static const uint64_t guest_offloads_mask =
712 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
713 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
714 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
715 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
716 (1ULL << VIRTIO_NET_F_GUEST_UFO);
717
718 return guest_offloads_mask & features;
719}
720
721static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
722{
723 VirtIODevice *vdev = VIRTIO_DEVICE(n);
724 return virtio_net_guest_offloads_by_features(vdev->guest_features);
725}
726
d5aaa1b0 727static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
fbe78f4f 728{
17a0ca55 729 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9
JW
730 int i;
731
75ebec11
MC
732 if (n->mtu_bypass_backend &&
733 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
734 features &= ~(1ULL << VIRTIO_NET_F_MTU);
735 }
736
ef546f12 737 virtio_net_set_multiqueue(n,
95129d6f 738 virtio_has_feature(features, VIRTIO_NET_F_MQ));
fbe78f4f 739
ef546f12 740 virtio_net_set_mrg_rx_bufs(n,
95129d6f
CH
741 virtio_has_feature(features,
742 VIRTIO_NET_F_MRG_RXBUF),
743 virtio_has_feature(features,
744 VIRTIO_F_VERSION_1));
f5436dd9 745
2974e916
YB
746 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
747 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
748 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
749 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
750
f5436dd9 751 if (n->has_vnet_hdr) {
644c9858
DF
752 n->curr_guest_offloads =
753 virtio_net_guest_offloads_by_features(features);
754 virtio_net_apply_guest_offloads(n);
f5436dd9 755 }
fed699f9
JW
756
757 for (i = 0; i < n->max_queues; i++) {
758 NetClientState *nc = qemu_get_subqueue(n->nic, i);
759
ed8b4afe 760 if (!get_vhost_net(nc->peer)) {
fed699f9
JW
761 continue;
762 }
ed8b4afe 763 vhost_net_ack_features(get_vhost_net(nc->peer), features);
dc14a397 764 }
0b1eaa88 765
95129d6f 766 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
0b1eaa88
SF
767 memset(n->vlans, 0, MAX_VLAN >> 3);
768 } else {
769 memset(n->vlans, 0xff, MAX_VLAN >> 3);
770 }
fbe78f4f
AL
771}
772
002437cd 773static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
921ac5d0 774 struct iovec *iov, unsigned int iov_cnt)
002437cd
AL
775{
776 uint8_t on;
921ac5d0 777 size_t s;
b1be4280 778 NetClientState *nc = qemu_get_queue(n->nic);
002437cd 779
921ac5d0
MT
780 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
781 if (s != sizeof(on)) {
782 return VIRTIO_NET_ERR;
002437cd
AL
783 }
784
dd23454b 785 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
002437cd 786 n->promisc = on;
dd23454b 787 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
002437cd 788 n->allmulti = on;
dd23454b 789 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
015cb166 790 n->alluni = on;
dd23454b 791 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
015cb166 792 n->nomulti = on;
dd23454b 793 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
015cb166 794 n->nouni = on;
dd23454b 795 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
015cb166 796 n->nobcast = on;
921ac5d0 797 } else {
002437cd 798 return VIRTIO_NET_ERR;
921ac5d0 799 }
002437cd 800
b1be4280
AK
801 rxfilter_notify(nc);
802
002437cd
AL
803 return VIRTIO_NET_OK;
804}
805
644c9858
DF
806static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
807 struct iovec *iov, unsigned int iov_cnt)
808{
809 VirtIODevice *vdev = VIRTIO_DEVICE(n);
810 uint64_t offloads;
811 size_t s;
812
95129d6f 813 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
644c9858
DF
814 return VIRTIO_NET_ERR;
815 }
816
817 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
818 if (s != sizeof(offloads)) {
819 return VIRTIO_NET_ERR;
820 }
821
822 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
823 uint64_t supported_offloads;
824
189ae6bb
JW
825 offloads = virtio_ldq_p(vdev, &offloads);
826
644c9858
DF
827 if (!n->has_vnet_hdr) {
828 return VIRTIO_NET_ERR;
829 }
830
2974e916
YB
831 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
832 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
833 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
834 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
835 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
836
644c9858
DF
837 supported_offloads = virtio_net_supported_guest_offloads(n);
838 if (offloads & ~supported_offloads) {
839 return VIRTIO_NET_ERR;
840 }
841
842 n->curr_guest_offloads = offloads;
843 virtio_net_apply_guest_offloads(n);
844
845 return VIRTIO_NET_OK;
846 } else {
847 return VIRTIO_NET_ERR;
848 }
849}
850
b6503ed9 851static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
921ac5d0 852 struct iovec *iov, unsigned int iov_cnt)
b6503ed9 853{
1399c60d 854 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b6503ed9 855 struct virtio_net_ctrl_mac mac_data;
921ac5d0 856 size_t s;
b1be4280 857 NetClientState *nc = qemu_get_queue(n->nic);
b6503ed9 858
c1943a3f
AK
859 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
860 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
861 return VIRTIO_NET_ERR;
862 }
863 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
864 assert(s == sizeof(n->mac));
b356f76d 865 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
b1be4280
AK
866 rxfilter_notify(nc);
867
c1943a3f
AK
868 return VIRTIO_NET_OK;
869 }
870
921ac5d0 871 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
b6503ed9 872 return VIRTIO_NET_ERR;
921ac5d0 873 }
b6503ed9 874
cae2e556
AK
875 int in_use = 0;
876 int first_multi = 0;
877 uint8_t uni_overflow = 0;
878 uint8_t multi_overflow = 0;
879 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
b6503ed9 880
921ac5d0
MT
881 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
882 sizeof(mac_data.entries));
1399c60d 883 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
921ac5d0 884 if (s != sizeof(mac_data.entries)) {
b1be4280 885 goto error;
921ac5d0
MT
886 }
887 iov_discard_front(&iov, &iov_cnt, s);
b6503ed9 888
921ac5d0 889 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
b1be4280 890 goto error;
921ac5d0 891 }
b6503ed9
AL
892
893 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
cae2e556 894 s = iov_to_buf(iov, iov_cnt, 0, macs,
921ac5d0
MT
895 mac_data.entries * ETH_ALEN);
896 if (s != mac_data.entries * ETH_ALEN) {
b1be4280 897 goto error;
921ac5d0 898 }
cae2e556 899 in_use += mac_data.entries;
b6503ed9 900 } else {
cae2e556 901 uni_overflow = 1;
b6503ed9
AL
902 }
903
921ac5d0
MT
904 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
905
cae2e556 906 first_multi = in_use;
2d9aba39 907
921ac5d0
MT
908 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
909 sizeof(mac_data.entries));
1399c60d 910 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
921ac5d0 911 if (s != sizeof(mac_data.entries)) {
b1be4280 912 goto error;
921ac5d0
MT
913 }
914
915 iov_discard_front(&iov, &iov_cnt, s);
b6503ed9 916
921ac5d0 917 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
b1be4280 918 goto error;
921ac5d0 919 }
b6503ed9 920
edc24385 921 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
cae2e556 922 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
921ac5d0
MT
923 mac_data.entries * ETH_ALEN);
924 if (s != mac_data.entries * ETH_ALEN) {
b1be4280 925 goto error;
8fd2a2f1 926 }
cae2e556 927 in_use += mac_data.entries;
921ac5d0 928 } else {
cae2e556 929 multi_overflow = 1;
b6503ed9
AL
930 }
931
cae2e556
AK
932 n->mac_table.in_use = in_use;
933 n->mac_table.first_multi = first_multi;
934 n->mac_table.uni_overflow = uni_overflow;
935 n->mac_table.multi_overflow = multi_overflow;
936 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
937 g_free(macs);
b1be4280
AK
938 rxfilter_notify(nc);
939
b6503ed9 940 return VIRTIO_NET_OK;
b1be4280
AK
941
942error:
cae2e556 943 g_free(macs);
b1be4280 944 return VIRTIO_NET_ERR;
b6503ed9
AL
945}
946
f21c0ed9 947static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
921ac5d0 948 struct iovec *iov, unsigned int iov_cnt)
f21c0ed9 949{
1399c60d 950 VirtIODevice *vdev = VIRTIO_DEVICE(n);
f21c0ed9 951 uint16_t vid;
921ac5d0 952 size_t s;
b1be4280 953 NetClientState *nc = qemu_get_queue(n->nic);
f21c0ed9 954
921ac5d0 955 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1399c60d 956 vid = virtio_lduw_p(vdev, &vid);
921ac5d0 957 if (s != sizeof(vid)) {
f21c0ed9
AL
958 return VIRTIO_NET_ERR;
959 }
960
f21c0ed9
AL
961 if (vid >= MAX_VLAN)
962 return VIRTIO_NET_ERR;
963
964 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
965 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
966 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
967 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
968 else
969 return VIRTIO_NET_ERR;
970
b1be4280
AK
971 rxfilter_notify(nc);
972
f21c0ed9
AL
973 return VIRTIO_NET_OK;
974}
975
f57fcf70
JW
976static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
977 struct iovec *iov, unsigned int iov_cnt)
978{
979 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
980 n->status & VIRTIO_NET_S_ANNOUNCE) {
981 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
982 if (n->announce_counter) {
983 timer_mod(n->announce_timer,
984 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
985 self_announce_delay(n->announce_counter));
986 }
987 return VIRTIO_NET_OK;
988 } else {
989 return VIRTIO_NET_ERR;
990 }
991}
992
fed699f9 993static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
f8f7c533 994 struct iovec *iov, unsigned int iov_cnt)
fed699f9 995{
17a0ca55 996 VirtIODevice *vdev = VIRTIO_DEVICE(n);
f8f7c533
JW
997 struct virtio_net_ctrl_mq mq;
998 size_t s;
999 uint16_t queues;
fed699f9 1000
f8f7c533
JW
1001 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1002 if (s != sizeof(mq)) {
fed699f9
JW
1003 return VIRTIO_NET_ERR;
1004 }
1005
1006 if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1007 return VIRTIO_NET_ERR;
1008 }
1009
1399c60d 1010 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
fed699f9 1011
f8f7c533
JW
1012 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1013 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1014 queues > n->max_queues ||
fed699f9
JW
1015 !n->multiqueue) {
1016 return VIRTIO_NET_ERR;
1017 }
1018
f8f7c533 1019 n->curr_queues = queues;
fed699f9
JW
1020 /* stop the backend before changing the number of queues to avoid handling a
1021 * disabled queue */
17a0ca55 1022 virtio_net_set_status(vdev, vdev->status);
fed699f9
JW
1023 virtio_net_set_queues(n);
1024
1025 return VIRTIO_NET_OK;
1026}
ba7eadb5 1027
3d11d36c
AL
1028static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1029{
17a0ca55 1030 VirtIONet *n = VIRTIO_NET(vdev);
3d11d36c
AL
1031 struct virtio_net_ctrl_hdr ctrl;
1032 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
51b19ebe 1033 VirtQueueElement *elem;
921ac5d0 1034 size_t s;
771b6ed3 1035 struct iovec *iov, *iov2;
921ac5d0 1036 unsigned int iov_cnt;
3d11d36c 1037
51b19ebe
PB
1038 for (;;) {
1039 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1040 if (!elem) {
1041 break;
1042 }
1043 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1044 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
ba7eadb5
GK
1045 virtio_error(vdev, "virtio-net ctrl missing headers");
1046 virtqueue_detach_element(vq, elem, 0);
1047 g_free(elem);
1048 break;
3d11d36c
AL
1049 }
1050
51b19ebe
PB
1051 iov_cnt = elem->out_num;
1052 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
921ac5d0
MT
1053 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1054 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1055 if (s != sizeof(ctrl)) {
1056 status = VIRTIO_NET_ERR;
dd23454b 1057 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
921ac5d0
MT
1058 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1059 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1060 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1061 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1062 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
f57fcf70
JW
1063 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1064 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
fed699f9 1065 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
f8f7c533 1066 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
644c9858
DF
1067 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1068 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
3d11d36c
AL
1069 }
1070
51b19ebe 1071 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
921ac5d0 1072 assert(s == sizeof(status));
3d11d36c 1073
51b19ebe 1074 virtqueue_push(vq, elem, sizeof(status));
3d11d36c 1075 virtio_notify(vdev, vq);
771b6ed3 1076 g_free(iov2);
51b19ebe 1077 g_free(elem);
3d11d36c
AL
1078 }
1079}
1080
fbe78f4f
AL
1081/* RX */
1082
1083static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1084{
17a0ca55 1085 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 1086 int queue_index = vq2q(virtio_get_queue_index(vq));
8aeff62d 1087
fed699f9 1088 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
fbe78f4f
AL
1089}
1090
4e68f7a0 1091static int virtio_net_can_receive(NetClientState *nc)
fbe78f4f 1092{
cc1f0f45 1093 VirtIONet *n = qemu_get_nic_opaque(nc);
17a0ca55 1094 VirtIODevice *vdev = VIRTIO_DEVICE(n);
fed699f9 1095 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
0c87e93e 1096
17a0ca55 1097 if (!vdev->vm_running) {
95477323
MT
1098 return 0;
1099 }
cdd5cc12 1100
fed699f9
JW
1101 if (nc->queue_index >= n->curr_queues) {
1102 return 0;
1103 }
1104
0c87e93e 1105 if (!virtio_queue_ready(q->rx_vq) ||
17a0ca55 1106 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
fbe78f4f 1107 return 0;
0c87e93e 1108 }
fbe78f4f 1109
cdd5cc12
MM
1110 return 1;
1111}
1112
0c87e93e 1113static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
cdd5cc12 1114{
0c87e93e
JW
1115 VirtIONet *n = q->n;
1116 if (virtio_queue_empty(q->rx_vq) ||
fbe78f4f 1117 (n->mergeable_rx_bufs &&
0c87e93e
JW
1118 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1119 virtio_queue_set_notification(q->rx_vq, 1);
06b12970
TL
1120
1121 /* To avoid a race condition where the guest has made some buffers
1122 * available after the above check but before notification was
1123 * enabled, check for available buffers again.
1124 */
0c87e93e 1125 if (virtio_queue_empty(q->rx_vq) ||
06b12970 1126 (n->mergeable_rx_bufs &&
0c87e93e 1127 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
06b12970 1128 return 0;
0c87e93e 1129 }
fbe78f4f
AL
1130 }
1131
0c87e93e 1132 virtio_queue_set_notification(q->rx_vq, 0);
fbe78f4f
AL
1133 return 1;
1134}
1135
1399c60d 1136static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
032a74a1 1137{
1399c60d
RR
1138 virtio_tswap16s(vdev, &hdr->hdr_len);
1139 virtio_tswap16s(vdev, &hdr->gso_size);
1140 virtio_tswap16s(vdev, &hdr->csum_start);
1141 virtio_tswap16s(vdev, &hdr->csum_offset);
032a74a1
CLG
1142}
1143
1d41b0c1
AL
1144/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1145 * it never finds out that the packets don't have valid checksums. This
1146 * causes dhclient to get upset. Fedora's carried a patch for ages to
1147 * fix this with Xen but it hasn't appeared in an upstream release of
1148 * dhclient yet.
1149 *
1150 * To avoid breaking existing guests, we catch udp packets and add
1151 * checksums. This is terrible but it's better than hacking the guest
1152 * kernels.
1153 *
1154 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1155 * we should provide a mechanism to disable it to avoid polluting the host
1156 * cache.
1157 */
1158static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
22cc84db 1159 uint8_t *buf, size_t size)
1d41b0c1
AL
1160{
1161 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1162 (size > 27 && size < 1500) && /* normal sized MTU */
1163 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1164 (buf[23] == 17) && /* ip.protocol == UDP */
1165 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
22cc84db 1166 net_checksum_calculate(buf, size);
1d41b0c1
AL
1167 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1168 }
1169}
1170
280598b7
MT
1171static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1172 const void *buf, size_t size)
fbe78f4f 1173{
3a330134 1174 if (n->has_vnet_hdr) {
22cc84db
MT
1175 /* FIXME this cast is evil */
1176 void *wbuf = (void *)buf;
280598b7
MT
1177 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1178 size - n->host_hdr_len);
1bfa316c
GK
1179
1180 if (n->needs_vnet_hdr_swap) {
1181 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1182 }
280598b7 1183 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
22cc84db
MT
1184 } else {
1185 struct virtio_net_hdr hdr = {
1186 .flags = 0,
1187 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1188 };
1189 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
3a330134 1190 }
fbe78f4f
AL
1191}
1192
3831ab20
AL
1193static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1194{
1195 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
f21c0ed9 1196 static const uint8_t vlan[] = {0x81, 0x00};
3831ab20 1197 uint8_t *ptr = (uint8_t *)buf;
b6503ed9 1198 int i;
3831ab20
AL
1199
1200 if (n->promisc)
1201 return 1;
1202
e043ebc6 1203 ptr += n->host_hdr_len;
3a330134 1204
f21c0ed9 1205 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
7542d3e7 1206 int vid = lduw_be_p(ptr + 14) & 0xfff;
f21c0ed9
AL
1207 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1208 return 0;
1209 }
1210
bbe2f399
AW
1211 if (ptr[0] & 1) { // multicast
1212 if (!memcmp(ptr, bcast, sizeof(bcast))) {
015cb166
AW
1213 return !n->nobcast;
1214 } else if (n->nomulti) {
1215 return 0;
8fd2a2f1 1216 } else if (n->allmulti || n->mac_table.multi_overflow) {
bbe2f399
AW
1217 return 1;
1218 }
2d9aba39
AW
1219
1220 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1221 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1222 return 1;
1223 }
1224 }
bbe2f399 1225 } else { // unicast
015cb166
AW
1226 if (n->nouni) {
1227 return 0;
1228 } else if (n->alluni || n->mac_table.uni_overflow) {
8fd2a2f1
AW
1229 return 1;
1230 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
bbe2f399
AW
1231 return 1;
1232 }
3831ab20 1233
2d9aba39
AW
1234 for (i = 0; i < n->mac_table.first_multi; i++) {
1235 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1236 return 1;
1237 }
1238 }
b6503ed9
AL
1239 }
1240
3831ab20
AL
1241 return 0;
1242}
1243
97cd965c
PB
1244static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1245 size_t size)
fbe78f4f 1246{
cc1f0f45 1247 VirtIONet *n = qemu_get_nic_opaque(nc);
fed699f9 1248 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
17a0ca55 1249 VirtIODevice *vdev = VIRTIO_DEVICE(n);
63c58728
MT
1250 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1251 struct virtio_net_hdr_mrg_rxbuf mhdr;
1252 unsigned mhdr_cnt = 0;
22cc84db 1253 size_t offset, i, guest_offset;
fbe78f4f 1254
fed699f9 1255 if (!virtio_net_can_receive(nc)) {
cdd5cc12 1256 return -1;
b356f76d 1257 }
cdd5cc12 1258
940cda94 1259 /* hdr_len refers to the header we supply to the guest */
0c87e93e 1260 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
8aeff62d 1261 return 0;
0c87e93e 1262 }
fbe78f4f 1263
3831ab20 1264 if (!receive_filter(n, buf, size))
4f1c942b 1265 return size;
3831ab20 1266
fbe78f4f
AL
1267 offset = i = 0;
1268
1269 while (offset < size) {
51b19ebe 1270 VirtQueueElement *elem;
fbe78f4f 1271 int len, total;
51b19ebe 1272 const struct iovec *sg;
fbe78f4f 1273
22c253d9 1274 total = 0;
fbe78f4f 1275
51b19ebe
PB
1276 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1277 if (!elem) {
ba10b9c0
GK
1278 if (i) {
1279 virtio_error(vdev, "virtio-net unexpected empty queue: "
1280 "i %zd mergeable %d offset %zd, size %zd, "
1281 "guest hdr len %zd, host hdr len %zd "
1282 "guest features 0x%" PRIx64,
1283 i, n->mergeable_rx_bufs, offset, size,
1284 n->guest_hdr_len, n->host_hdr_len,
1285 vdev->guest_features);
1286 }
1287 return -1;
fbe78f4f
AL
1288 }
1289
51b19ebe 1290 if (elem->in_num < 1) {
ba10b9c0
GK
1291 virtio_error(vdev,
1292 "virtio-net receive queue contains no in buffers");
1293 virtqueue_detach_element(q->rx_vq, elem, 0);
1294 g_free(elem);
1295 return -1;
fbe78f4f
AL
1296 }
1297
51b19ebe 1298 sg = elem->in_sg;
fbe78f4f 1299 if (i == 0) {
c8d28e7e 1300 assert(offset == 0);
63c58728
MT
1301 if (n->mergeable_rx_bufs) {
1302 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
51b19ebe 1303 sg, elem->in_num,
63c58728
MT
1304 offsetof(typeof(mhdr), num_buffers),
1305 sizeof(mhdr.num_buffers));
1306 }
fbe78f4f 1307
51b19ebe 1308 receive_header(n, sg, elem->in_num, buf, size);
c8d28e7e 1309 offset = n->host_hdr_len;
e35e23f6 1310 total += n->guest_hdr_len;
22cc84db
MT
1311 guest_offset = n->guest_hdr_len;
1312 } else {
1313 guest_offset = 0;
fbe78f4f
AL
1314 }
1315
1316 /* copy in packet. ugh */
51b19ebe 1317 len = iov_from_buf(sg, elem->in_num, guest_offset,
dcf6f5e1 1318 buf + offset, size - offset);
fbe78f4f 1319 total += len;
279a4253
MT
1320 offset += len;
1321 /* If buffers can't be merged, at this point we
1322 * must have consumed the complete packet.
1323 * Otherwise, drop it. */
1324 if (!n->mergeable_rx_bufs && offset < size) {
27e57efe 1325 virtqueue_unpop(q->rx_vq, elem, total);
51b19ebe 1326 g_free(elem);
279a4253
MT
1327 return size;
1328 }
fbe78f4f
AL
1329
1330 /* signal other side */
51b19ebe
PB
1331 virtqueue_fill(q->rx_vq, elem, total, i++);
1332 g_free(elem);
fbe78f4f
AL
1333 }
1334
63c58728 1335 if (mhdr_cnt) {
1399c60d 1336 virtio_stw_p(vdev, &mhdr.num_buffers, i);
63c58728
MT
1337 iov_from_buf(mhdr_sg, mhdr_cnt,
1338 0,
1339 &mhdr.num_buffers, sizeof mhdr.num_buffers);
44b15bc5 1340 }
fbe78f4f 1341
0c87e93e 1342 virtqueue_flush(q->rx_vq, i);
17a0ca55 1343 virtio_notify(vdev, q->rx_vq);
4f1c942b
MM
1344
1345 return size;
fbe78f4f
AL
1346}
1347
2974e916 1348static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
97cd965c
PB
1349 size_t size)
1350{
1351 ssize_t r;
1352
1353 rcu_read_lock();
1354 r = virtio_net_receive_rcu(nc, buf, size);
1355 rcu_read_unlock();
1356 return r;
1357}
1358
2974e916
YB
1359static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1360 const uint8_t *buf,
1361 VirtioNetRscUnit *unit)
1362{
1363 uint16_t ip_hdrlen;
1364 struct ip_header *ip;
1365
1366 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1367 + sizeof(struct eth_header));
1368 unit->ip = (void *)ip;
1369 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1370 unit->ip_plen = &ip->ip_len;
1371 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1372 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1373 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1374}
1375
1376static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1377 const uint8_t *buf,
1378 VirtioNetRscUnit *unit)
1379{
1380 struct ip6_header *ip6;
1381
1382 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1383 + sizeof(struct eth_header));
1384 unit->ip = ip6;
1385 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1386 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
1387 + sizeof(struct ip6_header));
1388 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1389
1390 /* There is a difference between payload lenght in ipv4 and v6,
1391 ip header is excluded in ipv6 */
1392 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1393}
1394
1395static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1396 VirtioNetRscSeg *seg)
1397{
1398 int ret;
1399 struct virtio_net_hdr *h;
1400
1401 h = (struct virtio_net_hdr *)seg->buf;
1402 h->flags = 0;
1403 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1404
1405 if (seg->is_coalesced) {
1406 *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1407 *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1408 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1409 if (chain->proto == ETH_P_IP) {
1410 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1411 } else {
1412 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1413 }
1414 }
1415
1416 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1417 QTAILQ_REMOVE(&chain->buffers, seg, next);
1418 g_free(seg->buf);
1419 g_free(seg);
1420
1421 return ret;
1422}
1423
1424static void virtio_net_rsc_purge(void *opq)
1425{
1426 VirtioNetRscSeg *seg, *rn;
1427 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1428
1429 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1430 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1431 chain->stat.purge_failed++;
1432 continue;
1433 }
1434 }
1435
1436 chain->stat.timer++;
1437 if (!QTAILQ_EMPTY(&chain->buffers)) {
1438 timer_mod(chain->drain_timer,
1439 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1440 }
1441}
1442
1443static void virtio_net_rsc_cleanup(VirtIONet *n)
1444{
1445 VirtioNetRscChain *chain, *rn_chain;
1446 VirtioNetRscSeg *seg, *rn_seg;
1447
1448 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1449 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1450 QTAILQ_REMOVE(&chain->buffers, seg, next);
1451 g_free(seg->buf);
1452 g_free(seg);
1453 }
1454
1455 timer_del(chain->drain_timer);
1456 timer_free(chain->drain_timer);
1457 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1458 g_free(chain);
1459 }
1460}
1461
1462static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1463 NetClientState *nc,
1464 const uint8_t *buf, size_t size)
1465{
1466 uint16_t hdr_len;
1467 VirtioNetRscSeg *seg;
1468
1469 hdr_len = chain->n->guest_hdr_len;
1470 seg = g_malloc(sizeof(VirtioNetRscSeg));
1471 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1472 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1473 memcpy(seg->buf, buf, size);
1474 seg->size = size;
1475 seg->packets = 1;
1476 seg->dup_ack = 0;
1477 seg->is_coalesced = 0;
1478 seg->nc = nc;
1479
1480 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1481 chain->stat.cache++;
1482
1483 switch (chain->proto) {
1484 case ETH_P_IP:
1485 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1486 break;
1487 case ETH_P_IPV6:
1488 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1489 break;
1490 default:
1491 g_assert_not_reached();
1492 }
1493}
1494
1495static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1496 VirtioNetRscSeg *seg,
1497 const uint8_t *buf,
1498 struct tcp_header *n_tcp,
1499 struct tcp_header *o_tcp)
1500{
1501 uint32_t nack, oack;
1502 uint16_t nwin, owin;
1503
1504 nack = htonl(n_tcp->th_ack);
1505 nwin = htons(n_tcp->th_win);
1506 oack = htonl(o_tcp->th_ack);
1507 owin = htons(o_tcp->th_win);
1508
1509 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1510 chain->stat.ack_out_of_win++;
1511 return RSC_FINAL;
1512 } else if (nack == oack) {
1513 /* duplicated ack or window probe */
1514 if (nwin == owin) {
1515 /* duplicated ack, add dup ack count due to whql test up to 1 */
1516 chain->stat.dup_ack++;
1517 return RSC_FINAL;
1518 } else {
1519 /* Coalesce window update */
1520 o_tcp->th_win = n_tcp->th_win;
1521 chain->stat.win_update++;
1522 return RSC_COALESCE;
1523 }
1524 } else {
1525 /* pure ack, go to 'C', finalize*/
1526 chain->stat.pure_ack++;
1527 return RSC_FINAL;
1528 }
1529}
1530
1531static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1532 VirtioNetRscSeg *seg,
1533 const uint8_t *buf,
1534 VirtioNetRscUnit *n_unit)
1535{
1536 void *data;
1537 uint16_t o_ip_len;
1538 uint32_t nseq, oseq;
1539 VirtioNetRscUnit *o_unit;
1540
1541 o_unit = &seg->unit;
1542 o_ip_len = htons(*o_unit->ip_plen);
1543 nseq = htonl(n_unit->tcp->th_seq);
1544 oseq = htonl(o_unit->tcp->th_seq);
1545
1546 /* out of order or retransmitted. */
1547 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1548 chain->stat.data_out_of_win++;
1549 return RSC_FINAL;
1550 }
1551
1552 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1553 if (nseq == oseq) {
1554 if ((o_unit->payload == 0) && n_unit->payload) {
1555 /* From no payload to payload, normal case, not a dup ack or etc */
1556 chain->stat.data_after_pure_ack++;
1557 goto coalesce;
1558 } else {
1559 return virtio_net_rsc_handle_ack(chain, seg, buf,
1560 n_unit->tcp, o_unit->tcp);
1561 }
1562 } else if ((nseq - oseq) != o_unit->payload) {
1563 /* Not a consistent packet, out of order */
1564 chain->stat.data_out_of_order++;
1565 return RSC_FINAL;
1566 } else {
1567coalesce:
1568 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1569 chain->stat.over_size++;
1570 return RSC_FINAL;
1571 }
1572
1573 /* Here comes the right data, the payload length in v4/v6 is different,
1574 so use the field value to update and record the new data len */
1575 o_unit->payload += n_unit->payload; /* update new data len */
1576
1577 /* update field in ip header */
1578 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1579
1580 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1581 for windows guest, while this may change the behavior for linux
1582 guest (only if it uses RSC feature). */
1583 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1584
1585 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1586 o_unit->tcp->th_win = n_unit->tcp->th_win;
1587
1588 memmove(seg->buf + seg->size, data, n_unit->payload);
1589 seg->size += n_unit->payload;
1590 seg->packets++;
1591 chain->stat.coalesced++;
1592 return RSC_COALESCE;
1593 }
1594}
1595
1596static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1597 VirtioNetRscSeg *seg,
1598 const uint8_t *buf, size_t size,
1599 VirtioNetRscUnit *unit)
1600{
1601 struct ip_header *ip1, *ip2;
1602
1603 ip1 = (struct ip_header *)(unit->ip);
1604 ip2 = (struct ip_header *)(seg->unit.ip);
1605 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1606 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1607 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1608 chain->stat.no_match++;
1609 return RSC_NO_MATCH;
1610 }
1611
1612 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1613}
1614
1615static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1616 VirtioNetRscSeg *seg,
1617 const uint8_t *buf, size_t size,
1618 VirtioNetRscUnit *unit)
1619{
1620 struct ip6_header *ip1, *ip2;
1621
1622 ip1 = (struct ip6_header *)(unit->ip);
1623 ip2 = (struct ip6_header *)(seg->unit.ip);
1624 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1625 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1626 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1627 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1628 chain->stat.no_match++;
1629 return RSC_NO_MATCH;
1630 }
1631
1632 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1633}
1634
1635/* Packets with 'SYN' should bypass, other flag should be sent after drain
1636 * to prevent out of order */
1637static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1638 struct tcp_header *tcp)
1639{
1640 uint16_t tcp_hdr;
1641 uint16_t tcp_flag;
1642
1643 tcp_flag = htons(tcp->th_offset_flags);
1644 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1645 tcp_flag &= VIRTIO_NET_TCP_FLAG;
1646 tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1647 if (tcp_flag & TH_SYN) {
1648 chain->stat.tcp_syn++;
1649 return RSC_BYPASS;
1650 }
1651
1652 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1653 chain->stat.tcp_ctrl_drain++;
1654 return RSC_FINAL;
1655 }
1656
1657 if (tcp_hdr > sizeof(struct tcp_header)) {
1658 chain->stat.tcp_all_opt++;
1659 return RSC_FINAL;
1660 }
1661
1662 return RSC_CANDIDATE;
1663}
1664
1665static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1666 NetClientState *nc,
1667 const uint8_t *buf, size_t size,
1668 VirtioNetRscUnit *unit)
1669{
1670 int ret;
1671 VirtioNetRscSeg *seg, *nseg;
1672
1673 if (QTAILQ_EMPTY(&chain->buffers)) {
1674 chain->stat.empty_cache++;
1675 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1676 timer_mod(chain->drain_timer,
1677 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1678 return size;
1679 }
1680
1681 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1682 if (chain->proto == ETH_P_IP) {
1683 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1684 } else {
1685 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1686 }
1687
1688 if (ret == RSC_FINAL) {
1689 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1690 /* Send failed */
1691 chain->stat.final_failed++;
1692 return 0;
1693 }
1694
1695 /* Send current packet */
1696 return virtio_net_do_receive(nc, buf, size);
1697 } else if (ret == RSC_NO_MATCH) {
1698 continue;
1699 } else {
1700 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1701 seg->is_coalesced = 1;
1702 return size;
1703 }
1704 }
1705
1706 chain->stat.no_match_cache++;
1707 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1708 return size;
1709}
1710
1711/* Drain a connection data, this is to avoid out of order segments */
1712static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1713 NetClientState *nc,
1714 const uint8_t *buf, size_t size,
1715 uint16_t ip_start, uint16_t ip_size,
1716 uint16_t tcp_port)
1717{
1718 VirtioNetRscSeg *seg, *nseg;
1719 uint32_t ppair1, ppair2;
1720
1721 ppair1 = *(uint32_t *)(buf + tcp_port);
1722 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1723 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1724 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1725 || (ppair1 != ppair2)) {
1726 continue;
1727 }
1728 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1729 chain->stat.drain_failed++;
1730 }
1731
1732 break;
1733 }
1734
1735 return virtio_net_do_receive(nc, buf, size);
1736}
1737
1738static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1739 struct ip_header *ip,
1740 const uint8_t *buf, size_t size)
1741{
1742 uint16_t ip_len;
1743
1744 /* Not an ipv4 packet */
1745 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1746 chain->stat.ip_option++;
1747 return RSC_BYPASS;
1748 }
1749
1750 /* Don't handle packets with ip option */
1751 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1752 chain->stat.ip_option++;
1753 return RSC_BYPASS;
1754 }
1755
1756 if (ip->ip_p != IPPROTO_TCP) {
1757 chain->stat.bypass_not_tcp++;
1758 return RSC_BYPASS;
1759 }
1760
1761 /* Don't handle packets with ip fragment */
1762 if (!(htons(ip->ip_off) & IP_DF)) {
1763 chain->stat.ip_frag++;
1764 return RSC_BYPASS;
1765 }
1766
1767 /* Don't handle packets with ecn flag */
1768 if (IPTOS_ECN(ip->ip_tos)) {
1769 chain->stat.ip_ecn++;
1770 return RSC_BYPASS;
1771 }
1772
1773 ip_len = htons(ip->ip_len);
1774 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1775 || ip_len > (size - chain->n->guest_hdr_len -
1776 sizeof(struct eth_header))) {
1777 chain->stat.ip_hacked++;
1778 return RSC_BYPASS;
1779 }
1780
1781 return RSC_CANDIDATE;
1782}
1783
1784static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1785 NetClientState *nc,
1786 const uint8_t *buf, size_t size)
1787{
1788 int32_t ret;
1789 uint16_t hdr_len;
1790 VirtioNetRscUnit unit;
1791
1792 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1793
1794 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1795 + sizeof(struct tcp_header))) {
1796 chain->stat.bypass_not_tcp++;
1797 return virtio_net_do_receive(nc, buf, size);
1798 }
1799
1800 virtio_net_rsc_extract_unit4(chain, buf, &unit);
1801 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1802 != RSC_CANDIDATE) {
1803 return virtio_net_do_receive(nc, buf, size);
1804 }
1805
1806 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1807 if (ret == RSC_BYPASS) {
1808 return virtio_net_do_receive(nc, buf, size);
1809 } else if (ret == RSC_FINAL) {
1810 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1811 ((hdr_len + sizeof(struct eth_header)) + 12),
1812 VIRTIO_NET_IP4_ADDR_SIZE,
1813 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1814 }
1815
1816 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1817}
1818
1819static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1820 struct ip6_header *ip6,
1821 const uint8_t *buf, size_t size)
1822{
1823 uint16_t ip_len;
1824
1825 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1826 != IP_HEADER_VERSION_6) {
1827 return RSC_BYPASS;
1828 }
1829
1830 /* Both option and protocol is checked in this */
1831 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1832 chain->stat.bypass_not_tcp++;
1833 return RSC_BYPASS;
1834 }
1835
1836 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1837 if (ip_len < sizeof(struct tcp_header) ||
1838 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1839 - sizeof(struct ip6_header))) {
1840 chain->stat.ip_hacked++;
1841 return RSC_BYPASS;
1842 }
1843
1844 /* Don't handle packets with ecn flag */
1845 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1846 chain->stat.ip_ecn++;
1847 return RSC_BYPASS;
1848 }
1849
1850 return RSC_CANDIDATE;
1851}
1852
1853static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1854 const uint8_t *buf, size_t size)
1855{
1856 int32_t ret;
1857 uint16_t hdr_len;
1858 VirtioNetRscChain *chain;
1859 VirtioNetRscUnit unit;
1860
1861 chain = (VirtioNetRscChain *)opq;
1862 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1863
1864 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
1865 + sizeof(tcp_header))) {
1866 return virtio_net_do_receive(nc, buf, size);
1867 }
1868
1869 virtio_net_rsc_extract_unit6(chain, buf, &unit);
1870 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
1871 unit.ip, buf, size)) {
1872 return virtio_net_do_receive(nc, buf, size);
1873 }
1874
1875 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1876 if (ret == RSC_BYPASS) {
1877 return virtio_net_do_receive(nc, buf, size);
1878 } else if (ret == RSC_FINAL) {
1879 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1880 ((hdr_len + sizeof(struct eth_header)) + 8),
1881 VIRTIO_NET_IP6_ADDR_SIZE,
1882 hdr_len + sizeof(struct eth_header)
1883 + sizeof(struct ip6_header));
1884 }
1885
1886 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1887}
1888
1889static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
1890 NetClientState *nc,
1891 uint16_t proto)
1892{
1893 VirtioNetRscChain *chain;
1894
1895 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
1896 return NULL;
1897 }
1898
1899 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
1900 if (chain->proto == proto) {
1901 return chain;
1902 }
1903 }
1904
1905 chain = g_malloc(sizeof(*chain));
1906 chain->n = n;
1907 chain->proto = proto;
1908 if (proto == (uint16_t)ETH_P_IP) {
1909 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
1910 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1911 } else {
1912 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
1913 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1914 }
1915 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
1916 virtio_net_rsc_purge, chain);
1917 memset(&chain->stat, 0, sizeof(chain->stat));
1918
1919 QTAILQ_INIT(&chain->buffers);
1920 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
1921
1922 return chain;
1923}
1924
1925static ssize_t virtio_net_rsc_receive(NetClientState *nc,
1926 const uint8_t *buf,
1927 size_t size)
1928{
1929 uint16_t proto;
1930 VirtioNetRscChain *chain;
1931 struct eth_header *eth;
1932 VirtIONet *n;
1933
1934 n = qemu_get_nic_opaque(nc);
1935 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
1936 return virtio_net_do_receive(nc, buf, size);
1937 }
1938
1939 eth = (struct eth_header *)(buf + n->guest_hdr_len);
1940 proto = htons(eth->h_proto);
1941
1942 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
1943 if (chain) {
1944 chain->stat.received++;
1945 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
1946 return virtio_net_rsc_receive4(chain, nc, buf, size);
1947 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
1948 return virtio_net_rsc_receive6(chain, nc, buf, size);
1949 }
1950 }
1951 return virtio_net_do_receive(nc, buf, size);
1952}
1953
1954static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1955 size_t size)
1956{
1957 VirtIONet *n = qemu_get_nic_opaque(nc);
1958 if ((n->rsc4_enabled || n->rsc6_enabled)) {
1959 return virtio_net_rsc_receive(nc, buf, size);
1960 } else {
1961 return virtio_net_do_receive(nc, buf, size);
1962 }
1963}
1964
0c87e93e 1965static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
6243375f 1966
4e68f7a0 1967static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
6243375f 1968{
cc1f0f45 1969 VirtIONet *n = qemu_get_nic_opaque(nc);
fed699f9 1970 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
17a0ca55 1971 VirtIODevice *vdev = VIRTIO_DEVICE(n);
6243375f 1972
51b19ebe 1973 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
17a0ca55 1974 virtio_notify(vdev, q->tx_vq);
6243375f 1975
51b19ebe
PB
1976 g_free(q->async_tx.elem);
1977 q->async_tx.elem = NULL;
6243375f 1978
0c87e93e
JW
1979 virtio_queue_set_notification(q->tx_vq, 1);
1980 virtio_net_flush_tx(q);
6243375f
MM
1981}
1982
fbe78f4f 1983/* TX */
0c87e93e 1984static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
fbe78f4f 1985{
0c87e93e 1986 VirtIONet *n = q->n;
17a0ca55 1987 VirtIODevice *vdev = VIRTIO_DEVICE(n);
51b19ebe 1988 VirtQueueElement *elem;
e3f30488 1989 int32_t num_packets = 0;
fed699f9 1990 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
17a0ca55 1991 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
e3f30488
AW
1992 return num_packets;
1993 }
fbe78f4f 1994
51b19ebe 1995 if (q->async_tx.elem) {
0c87e93e 1996 virtio_queue_set_notification(q->tx_vq, 0);
e3f30488 1997 return num_packets;
6243375f
MM
1998 }
1999
51b19ebe 2000 for (;;) {
bd89dd98 2001 ssize_t ret;
51b19ebe
PB
2002 unsigned int out_num;
2003 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
feb93f36 2004 struct virtio_net_hdr_mrg_rxbuf mhdr;
fbe78f4f 2005
51b19ebe
PB
2006 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2007 if (!elem) {
2008 break;
2009 }
2010
2011 out_num = elem->out_num;
2012 out_sg = elem->out_sg;
7b80d08e 2013 if (out_num < 1) {
fa5e56c2
GK
2014 virtio_error(vdev, "virtio-net header not in first element");
2015 virtqueue_detach_element(q->tx_vq, elem, 0);
2016 g_free(elem);
2017 return -EINVAL;
fbe78f4f
AL
2018 }
2019
032a74a1 2020 if (n->has_vnet_hdr) {
feb93f36
JW
2021 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2022 n->guest_hdr_len) {
fa5e56c2
GK
2023 virtio_error(vdev, "virtio-net header incorrect");
2024 virtqueue_detach_element(q->tx_vq, elem, 0);
2025 g_free(elem);
2026 return -EINVAL;
032a74a1 2027 }
1bfa316c 2028 if (n->needs_vnet_hdr_swap) {
feb93f36
JW
2029 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2030 sg2[0].iov_base = &mhdr;
2031 sg2[0].iov_len = n->guest_hdr_len;
2032 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2033 out_sg, out_num,
2034 n->guest_hdr_len, -1);
2035 if (out_num == VIRTQUEUE_MAX_SIZE) {
2036 goto drop;
7d37435b 2037 }
feb93f36
JW
2038 out_num += 1;
2039 out_sg = sg2;
7d37435b 2040 }
032a74a1 2041 }
14761f9c
MT
2042 /*
2043 * If host wants to see the guest header as is, we can
2044 * pass it on unchanged. Otherwise, copy just the parts
2045 * that host is interested in.
2046 */
2047 assert(n->host_hdr_len <= n->guest_hdr_len);
2048 if (n->host_hdr_len != n->guest_hdr_len) {
2049 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2050 out_sg, out_num,
2051 0, n->host_hdr_len);
2052 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2053 out_sg, out_num,
2054 n->guest_hdr_len, -1);
2055 out_num = sg_num;
2056 out_sg = sg;
fbe78f4f
AL
2057 }
2058
fed699f9
JW
2059 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2060 out_sg, out_num, virtio_net_tx_complete);
6243375f 2061 if (ret == 0) {
0c87e93e
JW
2062 virtio_queue_set_notification(q->tx_vq, 0);
2063 q->async_tx.elem = elem;
e3f30488 2064 return -EBUSY;
6243375f
MM
2065 }
2066
feb93f36 2067drop:
51b19ebe 2068 virtqueue_push(q->tx_vq, elem, 0);
17a0ca55 2069 virtio_notify(vdev, q->tx_vq);
51b19ebe 2070 g_free(elem);
e3f30488
AW
2071
2072 if (++num_packets >= n->tx_burst) {
2073 break;
2074 }
fbe78f4f 2075 }
e3f30488 2076 return num_packets;
fbe78f4f
AL
2077}
2078
a697a334 2079static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
fbe78f4f 2080{
17a0ca55 2081 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2082 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
fbe78f4f 2083
283e2c2a
YB
2084 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2085 virtio_net_drop_tx_queue_data(vdev, vq);
2086 return;
2087 }
2088
783e7706 2089 /* This happens when device was stopped but VCPU wasn't. */
17a0ca55 2090 if (!vdev->vm_running) {
0c87e93e 2091 q->tx_waiting = 1;
783e7706
MT
2092 return;
2093 }
2094
0c87e93e 2095 if (q->tx_waiting) {
fbe78f4f 2096 virtio_queue_set_notification(vq, 1);
bc72ad67 2097 timer_del(q->tx_timer);
0c87e93e 2098 q->tx_waiting = 0;
fa5e56c2
GK
2099 if (virtio_net_flush_tx(q) == -EINVAL) {
2100 return;
2101 }
fbe78f4f 2102 } else {
bc72ad67
AB
2103 timer_mod(q->tx_timer,
2104 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
0c87e93e 2105 q->tx_waiting = 1;
fbe78f4f
AL
2106 virtio_queue_set_notification(vq, 0);
2107 }
2108}
2109
a697a334
AW
2110static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2111{
17a0ca55 2112 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2113 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
a697a334 2114
283e2c2a
YB
2115 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2116 virtio_net_drop_tx_queue_data(vdev, vq);
2117 return;
2118 }
2119
0c87e93e 2120 if (unlikely(q->tx_waiting)) {
a697a334
AW
2121 return;
2122 }
0c87e93e 2123 q->tx_waiting = 1;
783e7706 2124 /* This happens when device was stopped but VCPU wasn't. */
17a0ca55 2125 if (!vdev->vm_running) {
783e7706
MT
2126 return;
2127 }
a697a334 2128 virtio_queue_set_notification(vq, 0);
0c87e93e 2129 qemu_bh_schedule(q->tx_bh);
a697a334
AW
2130}
2131
fbe78f4f
AL
2132static void virtio_net_tx_timer(void *opaque)
2133{
0c87e93e
JW
2134 VirtIONetQueue *q = opaque;
2135 VirtIONet *n = q->n;
17a0ca55 2136 VirtIODevice *vdev = VIRTIO_DEVICE(n);
e8bcf842
MT
2137 /* This happens when device was stopped but BH wasn't. */
2138 if (!vdev->vm_running) {
2139 /* Make sure tx waiting is set, so we'll run when restarted. */
2140 assert(q->tx_waiting);
2141 return;
2142 }
fbe78f4f 2143
0c87e93e 2144 q->tx_waiting = 0;
fbe78f4f
AL
2145
2146 /* Just in case the driver is not ready on more */
17a0ca55 2147 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
fbe78f4f 2148 return;
17a0ca55 2149 }
fbe78f4f 2150
0c87e93e
JW
2151 virtio_queue_set_notification(q->tx_vq, 1);
2152 virtio_net_flush_tx(q);
fbe78f4f
AL
2153}
2154
a697a334
AW
2155static void virtio_net_tx_bh(void *opaque)
2156{
0c87e93e
JW
2157 VirtIONetQueue *q = opaque;
2158 VirtIONet *n = q->n;
17a0ca55 2159 VirtIODevice *vdev = VIRTIO_DEVICE(n);
a697a334
AW
2160 int32_t ret;
2161
e8bcf842
MT
2162 /* This happens when device was stopped but BH wasn't. */
2163 if (!vdev->vm_running) {
2164 /* Make sure tx waiting is set, so we'll run when restarted. */
2165 assert(q->tx_waiting);
2166 return;
2167 }
783e7706 2168
0c87e93e 2169 q->tx_waiting = 0;
a697a334
AW
2170
2171 /* Just in case the driver is not ready on more */
17a0ca55 2172 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
a697a334 2173 return;
17a0ca55 2174 }
a697a334 2175
0c87e93e 2176 ret = virtio_net_flush_tx(q);
fa5e56c2
GK
2177 if (ret == -EBUSY || ret == -EINVAL) {
2178 return; /* Notification re-enable handled by tx_complete or device
2179 * broken */
a697a334
AW
2180 }
2181
2182 /* If we flush a full burst of packets, assume there are
2183 * more coming and immediately reschedule */
2184 if (ret >= n->tx_burst) {
0c87e93e
JW
2185 qemu_bh_schedule(q->tx_bh);
2186 q->tx_waiting = 1;
a697a334
AW
2187 return;
2188 }
2189
2190 /* If less than a full burst, re-enable notification and flush
2191 * anything that may have come in while we weren't looking. If
2192 * we find something, assume the guest is still active and reschedule */
0c87e93e 2193 virtio_queue_set_notification(q->tx_vq, 1);
fa5e56c2
GK
2194 ret = virtio_net_flush_tx(q);
2195 if (ret == -EINVAL) {
2196 return;
2197 } else if (ret > 0) {
0c87e93e
JW
2198 virtio_queue_set_notification(q->tx_vq, 0);
2199 qemu_bh_schedule(q->tx_bh);
2200 q->tx_waiting = 1;
a697a334
AW
2201 }
2202}
2203
f9d6dbf0
WC
2204static void virtio_net_add_queue(VirtIONet *n, int index)
2205{
2206 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2207
1c0fbfa3
MT
2208 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2209 virtio_net_handle_rx);
9b02e161 2210
f9d6dbf0
WC
2211 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2212 n->vqs[index].tx_vq =
9b02e161
WW
2213 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2214 virtio_net_handle_tx_timer);
f9d6dbf0
WC
2215 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2216 virtio_net_tx_timer,
2217 &n->vqs[index]);
2218 } else {
2219 n->vqs[index].tx_vq =
9b02e161
WW
2220 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2221 virtio_net_handle_tx_bh);
f9d6dbf0
WC
2222 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2223 }
2224
2225 n->vqs[index].tx_waiting = 0;
2226 n->vqs[index].n = n;
2227}
2228
2229static void virtio_net_del_queue(VirtIONet *n, int index)
2230{
2231 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2232 VirtIONetQueue *q = &n->vqs[index];
2233 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2234
2235 qemu_purge_queued_packets(nc);
2236
2237 virtio_del_queue(vdev, index * 2);
2238 if (q->tx_timer) {
2239 timer_del(q->tx_timer);
2240 timer_free(q->tx_timer);
f989c30c 2241 q->tx_timer = NULL;
f9d6dbf0
WC
2242 } else {
2243 qemu_bh_delete(q->tx_bh);
f989c30c 2244 q->tx_bh = NULL;
f9d6dbf0 2245 }
f989c30c 2246 q->tx_waiting = 0;
f9d6dbf0
WC
2247 virtio_del_queue(vdev, index * 2 + 1);
2248}
2249
2250static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2251{
2252 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2253 int old_num_queues = virtio_get_num_queues(vdev);
2254 int new_num_queues = new_max_queues * 2 + 1;
2255 int i;
2256
2257 assert(old_num_queues >= 3);
2258 assert(old_num_queues % 2 == 1);
2259
2260 if (old_num_queues == new_num_queues) {
2261 return;
2262 }
2263
2264 /*
2265 * We always need to remove and add ctrl vq if
2266 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2267 * and then we only enter one of the following too loops.
2268 */
2269 virtio_del_queue(vdev, old_num_queues - 1);
2270
2271 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2272 /* new_num_queues < old_num_queues */
2273 virtio_net_del_queue(n, i / 2);
2274 }
2275
2276 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2277 /* new_num_queues > old_num_queues */
2278 virtio_net_add_queue(n, i / 2);
2279 }
2280
2281 /* add ctrl_vq last */
2282 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2283}
2284
ec57db16 2285static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
fed699f9 2286{
f9d6dbf0
WC
2287 int max = multiqueue ? n->max_queues : 1;
2288
fed699f9 2289 n->multiqueue = multiqueue;
f9d6dbf0 2290 virtio_net_change_num_queues(n, max);
fed699f9 2291
fed699f9
JW
2292 virtio_net_set_queues(n);
2293}
2294
982b78c5 2295static int virtio_net_post_load_device(void *opaque, int version_id)
037dab2f 2296{
982b78c5
DDAG
2297 VirtIONet *n = opaque;
2298 VirtIODevice *vdev = VIRTIO_DEVICE(n);
037dab2f 2299 int i, link_down;
fbe78f4f 2300
982b78c5 2301 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
95129d6f
CH
2302 virtio_vdev_has_feature(vdev,
2303 VIRTIO_F_VERSION_1));
fbe78f4f 2304
76010cb3 2305 /* MAC_TABLE_ENTRIES may be different from the saved image */
982b78c5 2306 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
76010cb3 2307 n->mac_table.in_use = 0;
b6503ed9 2308 }
0ce0e8f4 2309
982b78c5 2310 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
6c666823
MT
2311 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2312 }
2313
2314 if (peer_has_vnet_hdr(n)) {
2315 virtio_net_apply_guest_offloads(n);
2316 }
2317
5f800801
JW
2318 virtio_net_set_queues(n);
2319
2d9aba39
AW
2320 /* Find the first multicast entry in the saved MAC filter */
2321 for (i = 0; i < n->mac_table.in_use; i++) {
2322 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2323 break;
2324 }
2325 }
2326 n->mac_table.first_multi = i;
98991481
AK
2327
2328 /* nc.link_down can't be migrated, so infer link_down according
2329 * to link status bit in n->status */
5f800801
JW
2330 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2331 for (i = 0; i < n->max_queues; i++) {
2332 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2333 }
98991481 2334
6c666823
MT
2335 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2336 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2337 n->announce_counter = SELF_ANNOUNCE_ROUNDS;
2338 timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
2339 }
2340
fbe78f4f
AL
2341 return 0;
2342}
2343
982b78c5
DDAG
2344/* tx_waiting field of a VirtIONetQueue */
2345static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2346 .name = "virtio-net-queue-tx_waiting",
2347 .fields = (VMStateField[]) {
2348 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2349 VMSTATE_END_OF_LIST()
2350 },
2351};
2352
2353static bool max_queues_gt_1(void *opaque, int version_id)
2354{
2355 return VIRTIO_NET(opaque)->max_queues > 1;
2356}
2357
2358static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2359{
2360 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2361 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2362}
2363
2364static bool mac_table_fits(void *opaque, int version_id)
2365{
2366 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2367}
2368
2369static bool mac_table_doesnt_fit(void *opaque, int version_id)
2370{
2371 return !mac_table_fits(opaque, version_id);
2372}
2373
2374/* This temporary type is shared by all the WITH_TMP methods
2375 * although only some fields are used by each.
2376 */
2377struct VirtIONetMigTmp {
2378 VirtIONet *parent;
2379 VirtIONetQueue *vqs_1;
2380 uint16_t curr_queues_1;
2381 uint8_t has_ufo;
2382 uint32_t has_vnet_hdr;
2383};
2384
2385/* The 2nd and subsequent tx_waiting flags are loaded later than
2386 * the 1st entry in the queues and only if there's more than one
2387 * entry. We use the tmp mechanism to calculate a temporary
2388 * pointer and count and also validate the count.
2389 */
2390
44b1ff31 2391static int virtio_net_tx_waiting_pre_save(void *opaque)
982b78c5
DDAG
2392{
2393 struct VirtIONetMigTmp *tmp = opaque;
2394
2395 tmp->vqs_1 = tmp->parent->vqs + 1;
2396 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2397 if (tmp->parent->curr_queues == 0) {
2398 tmp->curr_queues_1 = 0;
2399 }
44b1ff31
DDAG
2400
2401 return 0;
982b78c5
DDAG
2402}
2403
2404static int virtio_net_tx_waiting_pre_load(void *opaque)
2405{
2406 struct VirtIONetMigTmp *tmp = opaque;
2407
2408 /* Reuse the pointer setup from save */
2409 virtio_net_tx_waiting_pre_save(opaque);
2410
2411 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2412 error_report("virtio-net: curr_queues %x > max_queues %x",
2413 tmp->parent->curr_queues, tmp->parent->max_queues);
2414
2415 return -EINVAL;
2416 }
2417
2418 return 0; /* all good */
2419}
2420
2421static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2422 .name = "virtio-net-tx_waiting",
2423 .pre_load = virtio_net_tx_waiting_pre_load,
2424 .pre_save = virtio_net_tx_waiting_pre_save,
2425 .fields = (VMStateField[]) {
2426 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2427 curr_queues_1,
2428 vmstate_virtio_net_queue_tx_waiting,
2429 struct VirtIONetQueue),
2430 VMSTATE_END_OF_LIST()
2431 },
2432};
2433
2434/* the 'has_ufo' flag is just tested; if the incoming stream has the
2435 * flag set we need to check that we have it
2436 */
2437static int virtio_net_ufo_post_load(void *opaque, int version_id)
2438{
2439 struct VirtIONetMigTmp *tmp = opaque;
2440
2441 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2442 error_report("virtio-net: saved image requires TUN_F_UFO support");
2443 return -EINVAL;
2444 }
2445
2446 return 0;
2447}
2448
44b1ff31 2449static int virtio_net_ufo_pre_save(void *opaque)
982b78c5
DDAG
2450{
2451 struct VirtIONetMigTmp *tmp = opaque;
2452
2453 tmp->has_ufo = tmp->parent->has_ufo;
44b1ff31
DDAG
2454
2455 return 0;
982b78c5
DDAG
2456}
2457
2458static const VMStateDescription vmstate_virtio_net_has_ufo = {
2459 .name = "virtio-net-ufo",
2460 .post_load = virtio_net_ufo_post_load,
2461 .pre_save = virtio_net_ufo_pre_save,
2462 .fields = (VMStateField[]) {
2463 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2464 VMSTATE_END_OF_LIST()
2465 },
2466};
2467
2468/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2469 * flag set we need to check that we have it
2470 */
2471static int virtio_net_vnet_post_load(void *opaque, int version_id)
2472{
2473 struct VirtIONetMigTmp *tmp = opaque;
2474
2475 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2476 error_report("virtio-net: saved image requires vnet_hdr=on");
2477 return -EINVAL;
2478 }
2479
2480 return 0;
2481}
2482
44b1ff31 2483static int virtio_net_vnet_pre_save(void *opaque)
982b78c5
DDAG
2484{
2485 struct VirtIONetMigTmp *tmp = opaque;
2486
2487 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
44b1ff31
DDAG
2488
2489 return 0;
982b78c5
DDAG
2490}
2491
2492static const VMStateDescription vmstate_virtio_net_has_vnet = {
2493 .name = "virtio-net-vnet",
2494 .post_load = virtio_net_vnet_post_load,
2495 .pre_save = virtio_net_vnet_pre_save,
2496 .fields = (VMStateField[]) {
2497 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2498 VMSTATE_END_OF_LIST()
2499 },
2500};
2501
2502static const VMStateDescription vmstate_virtio_net_device = {
2503 .name = "virtio-net-device",
2504 .version_id = VIRTIO_NET_VM_VERSION,
2505 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2506 .post_load = virtio_net_post_load_device,
2507 .fields = (VMStateField[]) {
2508 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2509 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2510 vmstate_virtio_net_queue_tx_waiting,
2511 VirtIONetQueue),
2512 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2513 VMSTATE_UINT16(status, VirtIONet),
2514 VMSTATE_UINT8(promisc, VirtIONet),
2515 VMSTATE_UINT8(allmulti, VirtIONet),
2516 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2517
2518 /* Guarded pair: If it fits we load it, else we throw it away
2519 * - can happen if source has a larger MAC table.; post-load
2520 * sets flags in this case.
2521 */
2522 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2523 0, mac_table_fits, mac_table.in_use,
2524 ETH_ALEN),
2525 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2526 mac_table.in_use, ETH_ALEN),
2527
2528 /* Note: This is an array of uint32's that's always been saved as a
2529 * buffer; hold onto your endiannesses; it's actually used as a bitmap
2530 * but based on the uint.
2531 */
2532 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2533 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2534 vmstate_virtio_net_has_vnet),
2535 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2536 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2537 VMSTATE_UINT8(alluni, VirtIONet),
2538 VMSTATE_UINT8(nomulti, VirtIONet),
2539 VMSTATE_UINT8(nouni, VirtIONet),
2540 VMSTATE_UINT8(nobcast, VirtIONet),
2541 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2542 vmstate_virtio_net_has_ufo),
2543 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2544 vmstate_info_uint16_equal, uint16_t),
2545 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2546 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2547 vmstate_virtio_net_tx_waiting),
2548 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2549 has_ctrl_guest_offloads),
2550 VMSTATE_END_OF_LIST()
2551 },
2552};
2553
eb6b6c12 2554static NetClientInfo net_virtio_info = {
f394b2e2 2555 .type = NET_CLIENT_DRIVER_NIC,
eb6b6c12
MM
2556 .size = sizeof(NICState),
2557 .can_receive = virtio_net_can_receive,
2558 .receive = virtio_net_receive,
eb6b6c12 2559 .link_status_changed = virtio_net_set_link_status,
b1be4280 2560 .query_rx_filter = virtio_net_query_rxfilter,
eb6b6c12
MM
2561};
2562
f56a1247
MT
2563static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2564{
17a0ca55 2565 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2566 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
f56a1247 2567 assert(n->vhost_started);
ed8b4afe 2568 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
f56a1247
MT
2569}
2570
2571static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2572 bool mask)
2573{
17a0ca55 2574 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2575 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
f56a1247 2576 assert(n->vhost_started);
ed8b4afe 2577 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
f56a1247
MT
2578 vdev, idx, mask);
2579}
2580
019a3edb 2581static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
fbe78f4f 2582{
14f9b664 2583 int i, config_size = 0;
0cd09c3a 2584 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
a93e599d 2585
14f9b664
JL
2586 for (i = 0; feature_sizes[i].flags != 0; i++) {
2587 if (host_features & feature_sizes[i].flags) {
2588 config_size = MAX(feature_sizes[i].end, config_size);
2589 }
2590 }
17ec5a86
FK
2591 n->config_size = config_size;
2592}
2593
8a253ec2
FK
2594void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2595 const char *type)
2596{
2597 /*
2598 * The name can be NULL, the netclient name will be type.x.
2599 */
2600 assert(type != NULL);
2601
9e288406 2602 g_free(n->netclient_name);
9e288406 2603 g_free(n->netclient_type);
80e0090a 2604 n->netclient_name = g_strdup(name);
8a253ec2
FK
2605 n->netclient_type = g_strdup(type);
2606}
2607
e6f746b3 2608static void virtio_net_device_realize(DeviceState *dev, Error **errp)
17ec5a86 2609{
e6f746b3 2610 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
284a32f0 2611 VirtIONet *n = VIRTIO_NET(dev);
b1be4280 2612 NetClientState *nc;
284a32f0 2613 int i;
1773d9ee 2614
a93e599d 2615 if (n->net_conf.mtu) {
127833ee 2616 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
a93e599d
MC
2617 }
2618
9473939e
JB
2619 if (n->net_conf.duplex_str) {
2620 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2621 n->net_conf.duplex = DUPLEX_HALF;
2622 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2623 n->net_conf.duplex = DUPLEX_FULL;
2624 } else {
2625 error_setg(errp, "'duplex' must be 'half' or 'full'");
2626 }
2627 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2628 } else {
2629 n->net_conf.duplex = DUPLEX_UNKNOWN;
2630 }
2631
2632 if (n->net_conf.speed < SPEED_UNKNOWN) {
2633 error_setg(errp, "'speed' must be between 0 and INT_MAX");
2634 } else if (n->net_conf.speed >= 0) {
2635 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2636 }
2637
da3e8a23 2638 virtio_net_set_config_size(n, n->host_features);
284a32f0 2639 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
fbe78f4f 2640
1c0fbfa3
MT
2641 /*
2642 * We set a lower limit on RX queue size to what it always was.
2643 * Guests that want a smaller ring can always resize it without
2644 * help from us (using virtio 1 and up).
2645 */
2646 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2647 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
5f997fd1 2648 !is_power_of_2(n->net_conf.rx_queue_size)) {
1c0fbfa3
MT
2649 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2650 "must be a power of 2 between %d and %d.",
2651 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2652 VIRTQUEUE_MAX_SIZE);
2653 virtio_cleanup(vdev);
2654 return;
2655 }
2656
9b02e161
WW
2657 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2658 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2659 !is_power_of_2(n->net_conf.tx_queue_size)) {
2660 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2661 "must be a power of 2 between %d and %d",
2662 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
2663 VIRTQUEUE_MAX_SIZE);
2664 virtio_cleanup(vdev);
2665 return;
2666 }
2667
575a1c0e 2668 n->max_queues = MAX(n->nic_conf.peers.queues, 1);
87b3bd1c 2669 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
7e0e736e 2670 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
631b22ea 2671 "must be a positive integer less than %d.",
87b3bd1c 2672 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
7e0e736e
JW
2673 virtio_cleanup(vdev);
2674 return;
2675 }
f6b26cf2 2676 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
fed699f9 2677 n->curr_queues = 1;
1773d9ee 2678 n->tx_timeout = n->net_conf.txtimer;
a697a334 2679
1773d9ee
FK
2680 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
2681 && strcmp(n->net_conf.tx, "bh")) {
0765691e
MA
2682 warn_report("virtio-net: "
2683 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
2684 n->net_conf.tx);
2685 error_printf("Defaulting to \"bh\"");
a697a334
AW
2686 }
2687
2eef278b
MT
2688 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
2689 n->net_conf.tx_queue_size);
9b02e161 2690
da51a335 2691 for (i = 0; i < n->max_queues; i++) {
f9d6dbf0 2692 virtio_net_add_queue(n, i);
a697a334 2693 }
da51a335 2694
17a0ca55 2695 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1773d9ee
FK
2696 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
2697 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
554c97dd 2698 n->status = VIRTIO_NET_S_LINK_UP;
f57fcf70
JW
2699 n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
2700 virtio_net_announce_timer, n);
fbe78f4f 2701
8a253ec2
FK
2702 if (n->netclient_type) {
2703 /*
2704 * Happen when virtio_net_set_netclient_name has been called.
2705 */
2706 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2707 n->netclient_type, n->netclient_name, n);
2708 } else {
2709 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
284a32f0 2710 object_get_typename(OBJECT(dev)), dev->id, n);
8a253ec2
FK
2711 }
2712
6e371ab8
MT
2713 peer_test_vnet_hdr(n);
2714 if (peer_has_vnet_hdr(n)) {
fed699f9 2715 for (i = 0; i < n->max_queues; i++) {
d6085e3a 2716 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
fed699f9 2717 }
6e371ab8
MT
2718 n->host_hdr_len = sizeof(struct virtio_net_hdr);
2719 } else {
2720 n->host_hdr_len = 0;
2721 }
eb6b6c12 2722
1773d9ee 2723 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
96d5e201 2724
fed699f9 2725 n->vqs[0].tx_waiting = 0;
1773d9ee 2726 n->tx_burst = n->net_conf.txburst;
bb9d17f8 2727 virtio_net_set_mrg_rx_bufs(n, 0, 0);
002437cd 2728 n->promisc = 1; /* for compatibility */
fbe78f4f 2729
7267c094 2730 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
b6503ed9 2731
7267c094 2732 n->vlans = g_malloc0(MAX_VLAN >> 3);
f21c0ed9 2733
b1be4280
AK
2734 nc = qemu_get_queue(n->nic);
2735 nc->rxfilter_notify_enabled = 1;
2736
2974e916 2737 QTAILQ_INIT(&n->rsc_chains);
284a32f0 2738 n->qdev = dev;
17ec5a86
FK
2739}
2740
306ec6c3 2741static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
17ec5a86 2742{
306ec6c3
AF
2743 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2744 VirtIONet *n = VIRTIO_NET(dev);
f9d6dbf0 2745 int i, max_queues;
17ec5a86
FK
2746
2747 /* This will stop vhost backend if appropriate. */
2748 virtio_net_set_status(vdev, 0);
2749
9e288406
MA
2750 g_free(n->netclient_name);
2751 n->netclient_name = NULL;
2752 g_free(n->netclient_type);
2753 n->netclient_type = NULL;
8a253ec2 2754
17ec5a86
FK
2755 g_free(n->mac_table.macs);
2756 g_free(n->vlans);
2757
f9d6dbf0
WC
2758 max_queues = n->multiqueue ? n->max_queues : 1;
2759 for (i = 0; i < max_queues; i++) {
2760 virtio_net_del_queue(n, i);
17ec5a86
FK
2761 }
2762
f57fcf70
JW
2763 timer_del(n->announce_timer);
2764 timer_free(n->announce_timer);
17ec5a86
FK
2765 g_free(n->vqs);
2766 qemu_del_nic(n->nic);
2974e916 2767 virtio_net_rsc_cleanup(n);
6a1a8cc7 2768 virtio_cleanup(vdev);
17ec5a86
FK
2769}
2770
2771static void virtio_net_instance_init(Object *obj)
2772{
2773 VirtIONet *n = VIRTIO_NET(obj);
2774
2775 /*
2776 * The default config_size is sizeof(struct virtio_net_config).
2777 * Can be overriden with virtio_net_set_config_size.
2778 */
2779 n->config_size = sizeof(struct virtio_net_config);
aa4197c3
GA
2780 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2781 "bootindex", "/ethernet-phy@0",
2782 DEVICE(n), NULL);
17ec5a86
FK
2783}
2784
44b1ff31 2785static int virtio_net_pre_save(void *opaque)
4d45dcfb
HP
2786{
2787 VirtIONet *n = opaque;
2788
2789 /* At this point, backend must be stopped, otherwise
2790 * it might keep writing to memory. */
2791 assert(!n->vhost_started);
44b1ff31
DDAG
2792
2793 return 0;
4d45dcfb
HP
2794}
2795
2796static const VMStateDescription vmstate_virtio_net = {
2797 .name = "virtio-net",
2798 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2799 .version_id = VIRTIO_NET_VM_VERSION,
2800 .fields = (VMStateField[]) {
2801 VMSTATE_VIRTIO_DEVICE,
2802 VMSTATE_END_OF_LIST()
2803 },
2804 .pre_save = virtio_net_pre_save,
2805};
290c2428 2806
17ec5a86 2807static Property virtio_net_properties[] = {
127833ee
JB
2808 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
2809 VIRTIO_NET_F_CSUM, true),
2810 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
87108bb2 2811 VIRTIO_NET_F_GUEST_CSUM, true),
127833ee
JB
2812 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2813 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
87108bb2 2814 VIRTIO_NET_F_GUEST_TSO4, true),
127833ee 2815 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
87108bb2 2816 VIRTIO_NET_F_GUEST_TSO6, true),
127833ee 2817 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
87108bb2 2818 VIRTIO_NET_F_GUEST_ECN, true),
127833ee 2819 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
87108bb2 2820 VIRTIO_NET_F_GUEST_UFO, true),
127833ee 2821 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
87108bb2 2822 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
127833ee 2823 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
87108bb2 2824 VIRTIO_NET_F_HOST_TSO4, true),
127833ee 2825 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
87108bb2 2826 VIRTIO_NET_F_HOST_TSO6, true),
127833ee 2827 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
87108bb2 2828 VIRTIO_NET_F_HOST_ECN, true),
127833ee 2829 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
87108bb2 2830 VIRTIO_NET_F_HOST_UFO, true),
127833ee 2831 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
87108bb2 2832 VIRTIO_NET_F_MRG_RXBUF, true),
127833ee 2833 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
87108bb2 2834 VIRTIO_NET_F_STATUS, true),
127833ee 2835 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
87108bb2 2836 VIRTIO_NET_F_CTRL_VQ, true),
127833ee 2837 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
87108bb2 2838 VIRTIO_NET_F_CTRL_RX, true),
127833ee 2839 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
87108bb2 2840 VIRTIO_NET_F_CTRL_VLAN, true),
127833ee 2841 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
87108bb2 2842 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
127833ee 2843 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
87108bb2 2844 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
127833ee 2845 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
87108bb2 2846 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
127833ee 2847 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2974e916
YB
2848 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
2849 VIRTIO_NET_F_RSC_EXT, false),
2850 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
2851 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
17ec5a86
FK
2852 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2853 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
87108bb2 2854 TX_TIMER_INTERVAL),
17ec5a86
FK
2855 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2856 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
1c0fbfa3
MT
2857 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2858 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
9b02e161
WW
2859 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
2860 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
a93e599d 2861 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
75ebec11
MC
2862 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2863 true),
9473939e
JB
2864 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
2865 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
17ec5a86
FK
2866 DEFINE_PROP_END_OF_LIST(),
2867};
2868
2869static void virtio_net_class_init(ObjectClass *klass, void *data)
2870{
2871 DeviceClass *dc = DEVICE_CLASS(klass);
2872 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
e6f746b3 2873
17ec5a86 2874 dc->props = virtio_net_properties;
290c2428 2875 dc->vmsd = &vmstate_virtio_net;
125ee0ed 2876 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
e6f746b3 2877 vdc->realize = virtio_net_device_realize;
306ec6c3 2878 vdc->unrealize = virtio_net_device_unrealize;
17ec5a86
FK
2879 vdc->get_config = virtio_net_get_config;
2880 vdc->set_config = virtio_net_set_config;
2881 vdc->get_features = virtio_net_get_features;
2882 vdc->set_features = virtio_net_set_features;
2883 vdc->bad_features = virtio_net_bad_features;
2884 vdc->reset = virtio_net_reset;
2885 vdc->set_status = virtio_net_set_status;
2886 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2887 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2a083ffd 2888 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
982b78c5 2889 vdc->vmsd = &vmstate_virtio_net_device;
17ec5a86
FK
2890}
2891
2892static const TypeInfo virtio_net_info = {
2893 .name = TYPE_VIRTIO_NET,
2894 .parent = TYPE_VIRTIO_DEVICE,
2895 .instance_size = sizeof(VirtIONet),
2896 .instance_init = virtio_net_instance_init,
2897 .class_init = virtio_net_class_init,
2898};
2899
2900static void virtio_register_types(void)
2901{
2902 type_register_static(&virtio_net_info);
2903}
2904
2905type_init(virtio_register_types)