]> git.ipfire.org Git - thirdparty/libvirt.git/commitdiff
network: add tc filter rule to nftables backend to fix checksum of DHCP responses
authorLaine Stump <laine@redhat.com>
Tue, 26 Nov 2024 03:24:49 +0000 (22:24 -0500)
committerMichal Privoznik <mprivozn@redhat.com>
Tue, 26 Nov 2024 13:36:14 +0000 (14:36 +0100)
Please see the commit log for commit v10.9.0-rc1-1-g42ab0148dd for the
history and explanation of the problem that this patch is fixing.

A shorter explanation is that when a guest is connected to a libvirt
virtual network using a virtio-net adapter with in-kernel "vhost-net"
packet processing enabled, it will fail to acquire an IP address from
a DHCP seever running on the host.

In commit v10.9.0-rc1-1-g42ab0148dd we tried fixing this by *zeroing
out* the checksums of these packets with an nftables rule (nftables
can't recompute the checksum, but it can set it to 0) . This
*appeared* to work initially, but it turned out that zeroing the
checksum ends up breaking dhcp packets on *non* virtio/vhost-net guest
interfaces. That attempt was reverted in commit v10.9.0-rc2.

Fortunately, there is an existing way to recompute the checksum of a
packet as it leaves an interface - the "tc" (traffic control) utility
that libvirt already uses for bandwidth management. This patch uses a
tc filter rule to match dhcp response packets on the bridge and
recompute their checksum.

The filter rule must be attached to a tc qdisc, which may also have a
filter attached for bandwidth management (in the <bandwidth> element
of the network config). Not only must we add the qdisc only once
(which was already handled by the patch two prior to this one), but
also the filter rule for checksum fixing and the filter rule for
bandwidth management must be different priorities so they don't clash;
this is solved by adding the checksum-fix filter with "priority 2",
while the bandwidth management filter remains "priority 1" (both will
always be evaluated anyway, it's just a matter of which is evaluated
first).

So far this method has worked with every different guest we could
throw at it, including several that failed with the previous method.

Fixes: b89c4991daa0ee9371f10937fab3b03c5ffdabc6
Reported-by: Rich Jones <rjones@redhat.com>
Reported-by: Andrea Bolognani <abologna@redhat.com>
Fix-Suggested-by: Eric Garver <egarver@redhat.com>
Fix-Suggested-by: Phil Sutter <psutter@redhat.com>
Signed-off-by: Laine Stump <laine@redhat.com>
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
12 files changed:
src/network/network_nftables.c
tests/networkxml2firewalldata/forward-dev-linux.nftables
tests/networkxml2firewalldata/isolated-linux.nftables
tests/networkxml2firewalldata/nat-default-linux.nftables
tests/networkxml2firewalldata/nat-ipv6-linux.nftables
tests/networkxml2firewalldata/nat-ipv6-masquerade-linux.nftables
tests/networkxml2firewalldata/nat-many-ips-linux.nftables
tests/networkxml2firewalldata/nat-no-dhcp-linux.nftables
tests/networkxml2firewalldata/nat-port-range-ipv6-linux.nftables
tests/networkxml2firewalldata/nat-port-range-linux.nftables
tests/networkxml2firewalldata/nat-tftp-linux.nftables
tests/networkxml2firewalldata/route-default-linux.nftables

index b3605bd40e967f00c940bc693011019a58c3c837..5d716264bf5f5cd6661994eb1fd289895d09661e 100644 (file)
@@ -29,6 +29,7 @@
 
 #include "internal.h"
 #include "virfirewalld.h"
+#include "vircommand.h"
 #include "virerror.h"
 #include "virlog.h"
 #include "virhash.h"
@@ -924,6 +925,67 @@ nftablesAddIPSpecificFirewallRules(virFirewall *fw,
 }
 
 
+/**
+ * nftablesAddUdpChecksumFixWithTC:
+ *
+ * Add a tc filter rule to @ifname (the bridge device of this network)
+ * that will recompute the checksum of udp packets output from @iface with
+ * destination port @port.
+ *
+ * Normally the checksum should be filled by some part of the basic
+ * network stack, but there are cases (e.g. DHCP response packets sent
+ * from virtualization host to a QEMU guest when the guest NIC uses
+ * vhost-net packet processing) when the host (sender) thinks that
+ * packet checksums will be computed elsewhere (and so leaves a
+ * partially computed checksum in the packet header) while the guest
+ * (receiver) thinks that the checksum has already been fully
+ * computed; in the meantime none of the code in between has actually
+ * finished computing the checksum.
+ *
+ * An example of this is DHCP response packets from host to guest. If
+ * the checksum of each of these packets isn't properly computed, then
+ * many guests (e.g. FreeBSD) will drop them with reason BAD CHECKSUM;
+ * this tc filter rule will fix the ip and udp checksums, and the
+ * FreeBSD dhcp client will happily accept the packet.
+ *
+ * (NB: if you're wondering how the tc qdisc and filter are removed
+ * when the network is destroyed, the answer is that the kernel
+ * automatically (and properly) removes them for us, so we don't need
+ * to worry about keeping track/deleting as we do with nftables rules)
+ */
+static int
+nftablesAddUdpChecksumFixWithTC(virFirewall *fw,
+                                const char *iface,
+                                int port)
+{
+    g_autofree char *portstr = g_strdup_printf("%d", port);
+
+    /* this will add the qdisc (that the filter below is attached to)
+     * unless it already exists
+     */
+    if (virNetDevBandWidthAddTxFilterParentQdisc(iface, true) < 0)
+        return -1;
+
+    /* add a filter to catch all udp packets with dst "port" and
+     * recompute their checksum
+     */
+    virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_TC,
+                      "filter", "add", "dev", iface,
+                      "prio", "2", "protocol", "ip", "parent", "1:",
+                      "u32", "match", "ip", "dport", portstr, "ffff",
+                      "action", "csum", "ip", "and", "udp",
+                      NULL);
+
+    virFirewallAddRollbackCmd(fw, VIR_FIREWALL_LAYER_TC,
+                              "filter", "del", "dev", iface,
+                              "prio", "2", "protocol", "ip", "parent", "1:",
+                              "u32", "match", "ip", "dport", portstr, "ffff",
+                              "action", "csum", "ip", "and", "udp",
+                              NULL);
+    return 0;
+}
+
+
 /* nftablesAddFirewallrules:
  *
  * @def - the network that needs an nftables firewall added
@@ -944,6 +1006,12 @@ nftablesAddFirewallRules(virNetworkDef *def, virFirewall **fwRemoval)
 
     virFirewallStartTransaction(fw, VIR_FIREWALL_TRANSACTION_AUTO_ROLLBACK);
 
+    /* add the tc filter rule needed to fixup the checksum of dhcp
+     * response packets going from host to guest.
+     */
+    if (nftablesAddUdpChecksumFixWithTC(fw, def->bridge, 68) < 0)
+        return -1;
+
     nftablesAddGeneralFirewallRules(fw, def);
 
     for (i = 0;
index 8badb74bebec70293f4277f24638de506f7c5b0b..6772383b37f5faabd08c7e8a2637767fd292f646 100644 (file)
@@ -1,3 +1,43 @@
+tc \
+qdisc \
+show \
+dev \
+virbr0 \
+handle \
+1:
+tc \
+qdisc \
+add \
+dev \
+virbr0 \
+root \
+handle \
+1: \
+htb \
+default \
+2
+tc \
+filter \
+add \
+dev \
+virbr0 \
+prio \
+2 \
+protocol \
+ip \
+parent \
+1: \
+u32 \
+match \
+ip \
+dport \
+68 \
+ffff \
+action \
+csum \
+ip \
+and \
+udp
 nft \
 -ae insert \
 rule \
index d1b4dac178368d17e3283092fee3fc5d7d7dc20f..546a18b75aa9b979f515f7503a35f1ef6af487b8 100644 (file)
@@ -1,3 +1,43 @@
+tc \
+qdisc \
+show \
+dev \
+virbr0 \
+handle \
+1:
+tc \
+qdisc \
+add \
+dev \
+virbr0 \
+root \
+handle \
+1: \
+htb \
+default \
+2
+tc \
+filter \
+add \
+dev \
+virbr0 \
+prio \
+2 \
+protocol \
+ip \
+parent \
+1: \
+u32 \
+match \
+ip \
+dport \
+68 \
+ffff \
+action \
+csum \
+ip \
+and \
+udp
 nft \
 -ae insert \
 rule \
index 28508292f9efe44100077972853ba67fa0d7001e..08623c138189aff57f0f3e921c7976cc4ff0fdde 100644 (file)
@@ -1,3 +1,43 @@
+tc \
+qdisc \
+show \
+dev \
+virbr0 \
+handle \
+1:
+tc \
+qdisc \
+add \
+dev \
+virbr0 \
+root \
+handle \
+1: \
+htb \
+default \
+2
+tc \
+filter \
+add \
+dev \
+virbr0 \
+prio \
+2 \
+protocol \
+ip \
+parent \
+1: \
+u32 \
+match \
+ip \
+dport \
+68 \
+ffff \
+action \
+csum \
+ip \
+and \
+udp
 nft \
 -ae insert \
 rule \
index d8a9ba706d8b92b1ef4b9c2e36aa8c8fbba88760..3fd6b94eef9b66ceec894f2fbacd2485204caad6 100644 (file)
@@ -1,3 +1,43 @@
+tc \
+qdisc \
+show \
+dev \
+virbr0 \
+handle \
+1:
+tc \
+qdisc \
+add \
+dev \
+virbr0 \
+root \
+handle \
+1: \
+htb \
+default \
+2
+tc \
+filter \
+add \
+dev \
+virbr0 \
+prio \
+2 \
+protocol \
+ip \
+parent \
+1: \
+u32 \
+match \
+ip \
+dport \
+68 \
+ffff \
+action \
+csum \
+ip \
+and \
+udp
 nft \
 -ae insert \
 rule \
index a7f09cda59e0385acb48b2238809d3f8ffe3b38a..2811e098d1d74c8af6febe69e0d3c95f472051d1 100644 (file)
@@ -1,3 +1,43 @@
+tc \
+qdisc \
+show \
+dev \
+virbr0 \
+handle \
+1:
+tc \
+qdisc \
+add \
+dev \
+virbr0 \
+root \
+handle \
+1: \
+htb \
+default \
+2
+tc \
+filter \
+add \
+dev \
+virbr0 \
+prio \
+2 \
+protocol \
+ip \
+parent \
+1: \
+u32 \
+match \
+ip \
+dport \
+68 \
+ffff \
+action \
+csum \
+ip \
+and \
+udp
 nft \
 -ae insert \
 rule \
index b826fe6134d64dd0e01f19661787e73ad5e52893..5409d5b552f2fd77bf8a3abc9cdce4e277b3e84b 100644 (file)
@@ -1,3 +1,43 @@
+tc \
+qdisc \
+show \
+dev \
+virbr0 \
+handle \
+1:
+tc \
+qdisc \
+add \
+dev \
+virbr0 \
+root \
+handle \
+1: \
+htb \
+default \
+2
+tc \
+filter \
+add \
+dev \
+virbr0 \
+prio \
+2 \
+protocol \
+ip \
+parent \
+1: \
+u32 \
+match \
+ip \
+dport \
+68 \
+ffff \
+action \
+csum \
+ip \
+and \
+udp
 nft \
 -ae insert \
 rule \
index d8a9ba706d8b92b1ef4b9c2e36aa8c8fbba88760..3fd6b94eef9b66ceec894f2fbacd2485204caad6 100644 (file)
@@ -1,3 +1,43 @@
+tc \
+qdisc \
+show \
+dev \
+virbr0 \
+handle \
+1:
+tc \
+qdisc \
+add \
+dev \
+virbr0 \
+root \
+handle \
+1: \
+htb \
+default \
+2
+tc \
+filter \
+add \
+dev \
+virbr0 \
+prio \
+2 \
+protocol \
+ip \
+parent \
+1: \
+u32 \
+match \
+ip \
+dport \
+68 \
+ffff \
+action \
+csum \
+ip \
+and \
+udp
 nft \
 -ae insert \
 rule \
index ceaed6fa409a212073a740316724d7637c5591d0..d74417cdb3a00e3876818df6a2f352d62103f417 100644 (file)
@@ -1,3 +1,43 @@
+tc \
+qdisc \
+show \
+dev \
+virbr0 \
+handle \
+1:
+tc \
+qdisc \
+add \
+dev \
+virbr0 \
+root \
+handle \
+1: \
+htb \
+default \
+2
+tc \
+filter \
+add \
+dev \
+virbr0 \
+prio \
+2 \
+protocol \
+ip \
+parent \
+1: \
+u32 \
+match \
+ip \
+dport \
+68 \
+ffff \
+action \
+csum \
+ip \
+and \
+udp
 nft \
 -ae insert \
 rule \
index 1dc37a26ec872d4bc78d850aa1bd9d7f43974799..b55bb287a9a271789e8862744521e3dba0f6d675 100644 (file)
@@ -1,3 +1,43 @@
+tc \
+qdisc \
+show \
+dev \
+virbr0 \
+handle \
+1:
+tc \
+qdisc \
+add \
+dev \
+virbr0 \
+root \
+handle \
+1: \
+htb \
+default \
+2
+tc \
+filter \
+add \
+dev \
+virbr0 \
+prio \
+2 \
+protocol \
+ip \
+parent \
+1: \
+u32 \
+match \
+ip \
+dport \
+68 \
+ffff \
+action \
+csum \
+ip \
+and \
+udp
 nft \
 -ae insert \
 rule \
index 28508292f9efe44100077972853ba67fa0d7001e..08623c138189aff57f0f3e921c7976cc4ff0fdde 100644 (file)
@@ -1,3 +1,43 @@
+tc \
+qdisc \
+show \
+dev \
+virbr0 \
+handle \
+1:
+tc \
+qdisc \
+add \
+dev \
+virbr0 \
+root \
+handle \
+1: \
+htb \
+default \
+2
+tc \
+filter \
+add \
+dev \
+virbr0 \
+prio \
+2 \
+protocol \
+ip \
+parent \
+1: \
+u32 \
+match \
+ip \
+dport \
+68 \
+ffff \
+action \
+csum \
+ip \
+and \
+udp
 nft \
 -ae insert \
 rule \
index 282c9542a54d404a18434362b3b3f027e07b12ff..76d69025179a2ec08f1e1ebf1c2cf3f38787fd0a 100644 (file)
@@ -1,3 +1,43 @@
+tc \
+qdisc \
+show \
+dev \
+virbr0 \
+handle \
+1:
+tc \
+qdisc \
+add \
+dev \
+virbr0 \
+root \
+handle \
+1: \
+htb \
+default \
+2
+tc \
+filter \
+add \
+dev \
+virbr0 \
+prio \
+2 \
+protocol \
+ip \
+parent \
+1: \
+u32 \
+match \
+ip \
+dport \
+68 \
+ffff \
+action \
+csum \
+ip \
+and \
+udp
 nft \
 -ae insert \
 rule \