]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
vxlan: Support MC routing in the underlay
authorPetr Machata <petrm@nvidia.com>
Mon, 16 Jun 2025 22:44:19 +0000 (00:44 +0200)
committerJakub Kicinski <kuba@kernel.org>
Wed, 18 Jun 2025 01:18:46 +0000 (18:18 -0700)
Locally-generated MC packets have so far not been subject to MC routing.
Instead an MC-enabled installation would maintain the MC routing tables,
and separately from that the list of interfaces to send packets to as part
of the VXLAN FDB and MDB.

In a previous patch, a ip_mr_output() and ip6_mr_output() routines were
added for IPv4 and IPv6. All locally generated MC traffic is now passed
through these functions. For reasons of backward compatibility, an SKB
(IPCB / IP6CB) flag guards the actual MC routing.

This patch adds logic to set the flag, and the UAPI to enable the behavior.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/d899655bb7e9b2521ee8c793e67056b9fd02ba12.1750113335.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/vxlan/vxlan_core.c
include/net/vxlan.h
include/uapi/linux/if_link.h

index b22f9866be8eb38c57cfec6c8707b6b1c0d8a193..a6cc1de4d8b85ba9a15f989cc8f7fab56ebad47e 100644 (file)
@@ -2451,6 +2451,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
        rcu_read_lock();
        if (addr_family == AF_INET) {
                struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
+               u16 ipcb_flags = 0;
                struct rtable *rt;
                __be16 df = 0;
                __be32 saddr;
@@ -2467,6 +2468,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                        goto tx_error;
                }
 
+               if (flags & VXLAN_F_MC_ROUTE)
+                       ipcb_flags |= IPSKB_MCROUTE;
+
                if (!info) {
                        /* Bypass encapsulation if the destination is local */
                        err = encap_bypass_if_local(skb, dev, vxlan, AF_INET,
@@ -2522,11 +2526,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
                udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr,
                                    pkey->u.ipv4.dst, tos, ttl, df,
-                                   src_port, dst_port, xnet, !udp_sum, 0);
+                                   src_port, dst_port, xnet, !udp_sum,
+                                   ipcb_flags);
 #if IS_ENABLED(CONFIG_IPV6)
        } else {
                struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
                struct in6_addr saddr;
+               u16 ip6cb_flags = 0;
 
                if (!ifindex)
                        ifindex = sock6->sock->sk->sk_bound_dev_if;
@@ -2542,6 +2548,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                        goto tx_error;
                }
 
+               if (flags & VXLAN_F_MC_ROUTE)
+                       ip6cb_flags |= IP6SKB_MCROUTE;
+
                if (!info) {
                        u32 rt6i_flags = dst_rt6_info(ndst)->rt6i_flags;
 
@@ -2587,7 +2596,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
                                     &saddr, &pkey->u.ipv6.dst, tos, ttl,
                                     pkey->label, src_port, dst_port, !udp_sum,
-                                    0);
+                                    ip6cb_flags);
 #endif
        }
        vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len);
@@ -3402,6 +3411,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
        [IFLA_VXLAN_LOCALBYPASS]        = NLA_POLICY_MAX(NLA_U8, 1),
        [IFLA_VXLAN_LABEL_POLICY]       = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX),
        [IFLA_VXLAN_RESERVED_BITS] = NLA_POLICY_EXACT_LEN(sizeof(struct vxlanhdr)),
+       [IFLA_VXLAN_MC_ROUTE]           = NLA_POLICY_MAX(NLA_U8, 1),
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -4315,6 +4325,14 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
                        return err;
        }
 
+       if (data[IFLA_VXLAN_MC_ROUTE]) {
+               err = vxlan_nl2flag(conf, data, IFLA_VXLAN_MC_ROUTE,
+                                   VXLAN_F_MC_ROUTE, changelink,
+                                   true, extack);
+               if (err)
+                       return err;
+       }
+
        if (tb[IFLA_MTU]) {
                if (changelink) {
                        NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
index e2f7ca045d3e54d7310f130d8d60c77ff97d677b..0ee50785f4f1c26894f05b5e1ae52aad781bd062 100644 (file)
@@ -332,6 +332,7 @@ struct vxlan_dev {
 #define VXLAN_F_VNIFILTER               0x20000
 #define VXLAN_F_MDB                    0x40000
 #define VXLAN_F_LOCALBYPASS            0x80000
+#define VXLAN_F_MC_ROUTE               0x100000
 
 /* Flags that are used in the receive path. These flags must match in
  * order for a socket to be shareable
@@ -353,7 +354,9 @@ struct vxlan_dev {
                                         VXLAN_F_UDP_ZERO_CSUM6_RX |    \
                                         VXLAN_F_COLLECT_METADATA  |    \
                                         VXLAN_F_VNIFILTER         |    \
-                                        VXLAN_F_LOCALBYPASS)
+                                        VXLAN_F_LOCALBYPASS       |    \
+                                        VXLAN_F_MC_ROUTE          |    \
+                                        0)
 
 struct net_device *vxlan_dev_create(struct net *net, const char *name,
                                    u8 name_assign_type, struct vxlan_config *conf);
index 3ad2d5d9803479a10a6b2cfab2df98ce0f823926..873c285996febe62428a88cd269971458748ac04 100644 (file)
@@ -1398,6 +1398,7 @@ enum {
        IFLA_VXLAN_LOCALBYPASS,
        IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
        IFLA_VXLAN_RESERVED_BITS,
+       IFLA_VXLAN_MC_ROUTE,
        __IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)