+Tue Jan 27 10:48:12 IST 2009 Mark McLoughlin <markmc@redhat.com>
+
+ kvm/virtio: Set IFF_VNET_HDR when setting up tap fds
+
+ IFF_VNET_HDR is a tun/tap flag that allows you to send and receive
+ large (i.e. GSO) packets and packets with partial checksums. Setting
+ the flag means that every packet is proceeded by the same header which
+ virtio uses to communicate GSO/csum metadata.
+
+ By enabling this flag on the tap fds we create, we greatly increase
+ the achievable throughput with virtio_net.
+
+ However, we need to be careful to only set the flag when a) QEMU has
+ support for this ABI and b) the value of the flag is queryable using
+ the TUNGETIFF ioctl.
+
+ It's nearly five months since kvm-74 - the first KVM release with this
+ feature - was released. Up until now, we've not added libvirt support
+ because there is no clean way to detect support for this in QEMU at
+ runtime. A brief attempt to add a "info capabilities" monitor command
+ to QEMU floundered. Perfect is the enemy of good enough. Probing the
+ KVM version will suffice for now.
+
+ * src/qemu_conf.[ch] (qemudExtractVersionInfo): detect the KVM version,
+ set QEMUD_CMD_FLAG_VNET_HDR if we have kvm-74 or newer
+ (qemudBuildCommandLine): if qemu support VNET_HDR and this is a
+ virtio interface, then use it
+
+ * src/bridge.[ch] (brProbeVnetHdr): only enable IFF_VNET_HDR if
+ the kernel bits are available
+ (brAddTap): add a @vnet_hdr arg, set IFF_VNET_HDR
+
Tue Jan 27 11:53:32 +0100 2009 Jim Meyering <meyering@redhat.com>
and Daniel P. Berrange <berrange@redhat.com>
#include "internal.h"
#include "memory.h"
#include "util.h"
+#include "logging.h"
#define MAX_BRIDGE_ID 256
return ifSetMtu(ctl, ifname, mtu);
}
+/**
+ * brProbeVnetHdr:
+ * @tapfd: a tun/tap file descriptor
+ *
+ * Check whether it is safe to enable the IFF_VNET_HDR flag on the
+ * tap interface.
+ *
+ * Setting IFF_VNET_HDR enables QEMU's virtio_net driver to allow
+ * guests to pass larger (GSO) packets, with partial checksums, to
+ * the host. This greatly increases the achievable throughput.
+ *
+ * It is only useful to enable this when we're setting up a virtio
+ * interface. And it is only *safe* to enable it when we know for
+ * sure that a) qemu has support for IFF_VNET_HDR and b) the running
+ * kernel implements the TUNGETIFF ioctl(), which qemu needs to query
+ * the supplied tapfd.
+ *
+ * Returns 0 in case of success or an errno code in case of failure.
+ */
+static int
+brProbeVnetHdr(int tapfd)
+{
+#if defined(IFF_VNET_HDR) && defined(TUNGETFEATURES) && defined(TUNGETIFF)
+ unsigned int features;
+ struct ifreq dummy;
+
+ if (ioctl(tapfd, TUNGETFEATURES, &features) != 0) {
+ VIR_INFO0(_("Not enabling IFF_VNET_HDR; "
+ "TUNGETFEATURES ioctl() not implemented"));
+ return 0;
+ }
+
+ if (!(features & IFF_VNET_HDR)) {
+ VIR_INFO0(_("Not enabling IFF_VNET_HDR; "
+ "TUNGETFEATURES ioctl() reports no IFF_VNET_HDR"));
+ return 0;
+ }
+
+ /* The kernel will always return -1 at this point.
+ * If TUNGETIFF is not implemented then errno == EBADFD.
+ */
+ if (ioctl(tapfd, TUNGETIFF, &dummy) != -1 || errno != EBADFD) {
+ VIR_INFO0(_("Not enabling IFF_VNET_HDR; "
+ "TUNGETIFF ioctl() not implemented"));
+ return 0;
+ }
+
+ VIR_INFO0(_("Enabling IFF_VNET_HDR"));
+
+ return 1;
+#else
+ VIR_INFO0(_("Not enabling IFF_VNET_HDR; disabled at build time"));
+ return 0;
+#endif
+}
+
/**
* brAddTap:
* @ctl: bridge control pointer
* @bridge: the bridge name
* @ifname: the interface name (or name template)
+ * @vnet_hdr: whether to try enabling IFF_VNET_HDR
* @tapfd: file descriptor return value for the new tap device
*
* This function creates a new tap device on a bridge. @ifname can be either
brAddTap(brControl *ctl,
const char *bridge,
char **ifname,
+ int vnet_hdr,
int *tapfd)
{
int id, subst, fd;
if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
return errno;
+ if (vnet_hdr)
+ vnet_hdr = brProbeVnetHdr(fd);
+
do {
struct ifreq try;
int len;
try.ifr_flags = IFF_TAP|IFF_NO_PI;
+#ifdef IFF_VNET_HDR
+ if (vnet_hdr)
+ try.ifr_flags |= IFF_VNET_HDR;
+#endif
+
if (subst) {
len = snprintf(try.ifr_name, BR_IFNAME_MAXLEN, *ifname, id);
if (len >= BR_IFNAME_MAXLEN) {
int brAddTap (brControl *ctl,
const char *bridge,
char **ifname,
+ int vnet_hdr,
int *tapfd);
int brSetInterfaceUp (brControl *ctl,
int newstdout = -1;
int ret = -1, status;
unsigned int major, minor, micro;
- unsigned int version;
+ unsigned int version, kvm_version;
unsigned int flags = 0;
if (retflags)
if (len < 0)
goto cleanup2;
- if (sscanf(help, "QEMU PC emulator version %u.%u.%u",
- &major, &minor, µ) != 3) {
+ if (sscanf(help, "QEMU PC emulator version %u.%u.%u (kvm-%u)",
+ &major, &minor, µ, &kvm_version) != 4)
+ kvm_version = 0;
+
+ if (!kvm_version && sscanf(help, "QEMU PC emulator version %u.%u.%u",
+ &major, &minor, µ) != 3)
goto cleanup2;
- }
version = (major * 1000 * 1000) + (minor * 1000) + micro;
flags |= QEMUD_CMD_FLAG_DRIVE_BOOT;
if (version >= 9000)
flags |= QEMUD_CMD_FLAG_VNC_COLON;
+ if (kvm_version >= 74)
+ flags |= QEMUD_CMD_FLAG_VNET_HDR;
if (retversion)
*retversion = version;
qemudDebug("Version %d %d %d Cooked version: %d, with flags ? %d",
major, minor, micro, version, flags);
+ if (kvm_version)
+ qemudDebug("KVM version %d detected", kvm_version);
cleanup2:
VIR_FREE(help);
int **tapfds,
int *ntapfds,
virDomainNetDefPtr net,
- int vlan)
+ int vlan,
+ int vnet_hdr)
{
char *brname;
char tapfdstr[4+3+32+7];
}
if ((err = brAddTap(driver->brctl, brname,
- &net->ifname, &tapfd))) {
+ &net->ifname, vnet_hdr, &tapfd))) {
if (errno == ENOTSUP) {
/* In this particular case, give a better diagnostic. */
qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
case VIR_DOMAIN_NET_TYPE_NETWORK:
case VIR_DOMAIN_NET_TYPE_BRIDGE:
{
- char *tap = qemudNetworkIfaceConnect(conn, driver,
- tapfds, ntapfds,
- net, vlan);
+ char *tap;
+ int vnet_hdr = 0;
+
+ if (qemuCmdFlags & QEMUD_CMD_FLAG_VNET_HDR &&
+ net->model && STREQ(net->model, "virtio"))
+ vnet_hdr = 1;
+
+ tap = qemudNetworkIfaceConnect(conn, driver,
+ tapfds, ntapfds,
+ net, vlan, vnet_hdr);
if (tap == NULL)
goto error;
ADD_ARG(tap);
QEMUD_CMD_FLAG_NAME = (1 << 5),
QEMUD_CMD_FLAG_UUID = (1 << 6),
QEMUD_CMD_FLAG_DOMID = (1 << 7), /* Xenner only */
+ QEMUD_CMD_FLAG_VNET_HDR = (1 << 8),
};
/* Main driver state */