From: Petr Machata Date: Mon, 16 Jun 2025 22:44:19 +0000 (+0200) Subject: vxlan: Support MC routing in the underlay X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f8337efa4ff5a27e6c1d4e384166413eecd21a65;p=thirdparty%2Flinux.git vxlan: Support MC routing in the underlay Locally-generated MC packets have so far not been subject to MC routing. Instead an MC-enabled installation would maintain the MC routing tables, and separately from that the list of interfaces to send packets to as part of the VXLAN FDB and MDB. In a previous patch, a ip_mr_output() and ip6_mr_output() routines were added for IPv4 and IPv6. All locally generated MC traffic is now passed through these functions. For reasons of backward compatibility, an SKB (IPCB / IP6CB) flag guards the actual MC routing. This patch adds logic to set the flag, and the UAPI to enable the behavior. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/d899655bb7e9b2521ee8c793e67056b9fd02ba12.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index b22f9866be8eb..a6cc1de4d8b85 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2451,6 +2451,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, rcu_read_lock(); if (addr_family == AF_INET) { struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); + u16 ipcb_flags = 0; struct rtable *rt; __be16 df = 0; __be32 saddr; @@ -2467,6 +2468,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto tx_error; } + if (flags & VXLAN_F_MC_ROUTE) + ipcb_flags |= IPSKB_MCROUTE; + if (!info) { /* Bypass encapsulation if the destination is local */ err = encap_bypass_if_local(skb, dev, vxlan, AF_INET, @@ -2522,11 +2526,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr, pkey->u.ipv4.dst, tos, ttl, df, - src_port, dst_port, xnet, !udp_sum, 0); + src_port, dst_port, xnet, !udp_sum, + ipcb_flags); #if IS_ENABLED(CONFIG_IPV6) } else { struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); struct in6_addr saddr; + u16 ip6cb_flags = 0; if (!ifindex) ifindex = sock6->sock->sk->sk_bound_dev_if; @@ -2542,6 +2548,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto tx_error; } + if (flags & VXLAN_F_MC_ROUTE) + ip6cb_flags |= IP6SKB_MCROUTE; + if (!info) { u32 rt6i_flags = dst_rt6_info(ndst)->rt6i_flags; @@ -2587,7 +2596,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev, &saddr, &pkey->u.ipv6.dst, tos, ttl, pkey->label, src_port, dst_port, !udp_sum, - 0); + ip6cb_flags); #endif } vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len); @@ -3402,6 +3411,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1), [IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX), [IFLA_VXLAN_RESERVED_BITS] = NLA_POLICY_EXACT_LEN(sizeof(struct vxlanhdr)), + [IFLA_VXLAN_MC_ROUTE] = NLA_POLICY_MAX(NLA_U8, 1), }; static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], @@ -4315,6 +4325,14 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], return err; } + if (data[IFLA_VXLAN_MC_ROUTE]) { + err = vxlan_nl2flag(conf, data, IFLA_VXLAN_MC_ROUTE, + VXLAN_F_MC_ROUTE, changelink, + true, extack); + if (err) + return err; + } + if (tb[IFLA_MTU]) { if (changelink) { NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU], diff --git a/include/net/vxlan.h b/include/net/vxlan.h index e2f7ca045d3e5..0ee50785f4f1c 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -332,6 +332,7 @@ struct vxlan_dev { #define VXLAN_F_VNIFILTER 0x20000 #define VXLAN_F_MDB 0x40000 #define VXLAN_F_LOCALBYPASS 0x80000 +#define VXLAN_F_MC_ROUTE 0x100000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -353,7 +354,9 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_COLLECT_METADATA | \ VXLAN_F_VNIFILTER | \ - VXLAN_F_LOCALBYPASS) + VXLAN_F_LOCALBYPASS | \ + VXLAN_F_MC_ROUTE | \ + 0) struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 3ad2d5d980347..873c285996feb 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1398,6 +1398,7 @@ enum { IFLA_VXLAN_LOCALBYPASS, IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */ IFLA_VXLAN_RESERVED_BITS, + IFLA_VXLAN_MC_ROUTE, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)