--- /dev/null
- * BIRD -- Multicast routing kernel
+ /*
- * (c) 2016 Ondrej Hlavaty <aearsis@eideo.cz>
++ * BIRD -- UNIX Kernel Multicast Routing
+ *
- * Can be freely distributed and used under the terms of the GNU GPL.
++ * (c) 2016 Ondrej Hlavaty <aearsis@eideo.cz>
++ * (c) 2018 Ondrej Zajicek <santiago@crfreenet.org>
++ * (c) 2018 CZ.NIC z.s.p.o.
+ *
-/*
- * DOC: Multicast route kernel synchronization
++ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
- * This protocol is the BIRD's interface to the kernel part of multicast
- * routing. It assignes the VIF indices to interfaces, forwards the IGMP
- * packets to SK_IGMP sockets and, of course, adds MFC entries to the kernel.
++/**
++ * DOC: Kernel Multicast Routing
++ *
++ * This protocol is the interface to the kernel part of multicast routing. It
++ * handles registration of multicast interfaces (MIFs), maintenance of kernel
++ * Multicast Forwarding Cache (MFC), and reception of incoming IGMP packets.
+ *
- * Multicast in current kernel is a bit tricky. There must be exactly one
- * socket on which setsockopt MRT_INIT is called, then multicast forwarding is
- * enabled. Every multicast routing table update must be done through this
- * protocol.
++ * Multicast forwarding in Linux and BSD kernels is a bit tricky. There must be
++ * exactly one socket on which setsockopt MRT_INIT is called, then multicast
++ * forwarding is enabled and kernel multicast routing table is maintained until
++ * the socket is closed. This MRT control socket is stored in &mrt_sock field.
+ *
- * Also, that socket is the only one that receives IGMP packets on non-joined
- * groups. These packets IGMP protocol needs to receive, so we forward them
- * internally. To simulate sane behavior, protocol can open socket with type
- * SK_IGMP, which is almost as a SK_IP, IPPROTO_IGMP socket but receives copy
- * of all packets.
++ * Multicast forwarding works only on interfaces registered as MIFs, with
++ * assigned MIF index. While MIFs and MIF indexes are handled by OS-independent
++ * code in iface.c, actual MIF registration by OS kernel is handled here. The
++ * MKernel protocol is associated with a MIF group by mkrt_register_mif_group(),
++ * after that it receive mkrt_register_mif() / mkrt_unregister_mif() calls for
++ * changes in given MIF group.
+ *
- * As always with system-dependent code, prepare for everything. Because the
- * BSD kernel knows nothing apart from (S,G) routes, and Linux even blocked the
- * (*,G) routes for something not being a regular (*,G) routes, we must add the
- * routes in reaction to missed packets. This is very bad, but probably the
- * only solution, until someone rewrites the kernel part.
++ * Unlike kernel unicast routing API, which is proactive, kernel multicast
++ * routing API is designed as reactive. Kernel keeps MFC entries for encountered
++ * (S, G) flows and when a new flow is noticed, BIRD receives cache miss message
++ * (%IGMPMSG_NOCACHE) from kernel and responds with adding appropriate (S, G)
++ * MFC entry to the kernel, see mkrt_resolve_mfc(). Therefore, regular route
++ * notifications handled by mkrt_rt_notify() are not directly translated to
++ * kernel route updates.
+ *
- * Part of the protocol is global and "static", without the need to configure.
- * Another part is a usual proto instance.
++ * Although there is also support for (*, G) MFC entries in Linux (using
++ * %MRT_ADD_MFC_PROXY), their behavior is strange and not matching our needs,
++ * and there is no equivalent in BSD, we do not use them and we manage with
++ * traditional (S, G) MFC entries.
+ *
-#include "sysdep/unix/unix.h"
-#include "sysdep/unix/mkrt.h"
-
-#define HASH_I_KEY(n) n->iface->index
-#define HASH_I_NEXT(n) n->next
-#define HASH_I_EQ(a,b) (a == b)
-#define HASH_I_FN(n) n
++ * Finally, the MRT control socket is the only one that receives all IGMP
++ * packets, even those from non-joined groups. IGMP protocol needs to receive
++ * these packets, so we forward them internally. To simulate the sane behavior,
++ * a protocol can open an IGMP socket and use sk_setup_igmp() to register it to
++ * reception of all IGMP packets. The socket is relinked to internal MIF socket
++ * list. MKernel protocol then use mif_forward_igmp() to forward packets
++ * received on the MRT control socket to all sockets on these lists.
+ */
+
+ #include "nest/bird.h"
+ #include "nest/iface.h"
+ #include "lib/socket.h"
-#define HASH_MFC_KEY(n) n->ga
-#define HASH_MFC_NEXT(n) n->next
-#define HASH_MFC_EQ(a,b) ipa_equal(a, b)
-#define HASH_MFC_FN(k) ipa_hash(k)
+
-static struct mkrt_config *mkrt_cf;
-
-/* Global code for SK_IGMP sockets */
++#include "unix.h"
++#include "mkrt.h"
+
-struct mkrt_iface {
- struct mkrt_iface *next;
- struct iface *iface;
- list sockets;
-};
++#include <linux/mroute.h>
+
-static struct mkrt_global {
- pool *pool;
- list sockets;
- HASH(struct mkrt_iface) ifaces;
-} mkrt_global;
+
-void
-mkrt_io_init(void)
++/*
++ * MRT socket options
++ */
+
- mkrt_global.pool = rp_new(&root_pool, "Multicast kernel Syncer");
- HASH_INIT(mkrt_global.ifaces, mkrt_global.pool, 6);
- init_list(&mkrt_global.sockets);
++static inline int
++sk_mrt_init4(sock *s)
+ {
-static struct mkrt_iface *
-mkrt_iface_find(struct mkrt_proto *p, unsigned ifindex)
++ int y = 1;
++ return setsockopt(s->fd, IPPROTO_IP, MRT_INIT, &y, sizeof(y));
+ }
+
- return HASH_FIND(mkrt_global.ifaces, HASH_I, ifindex);
++static inline int
++sk_mrt_done4(sock *s)
+ {
-static struct mkrt_iface *
-mkrt_iface_get(unsigned ifindex)
++ return setsockopt(s->fd, IPPROTO_IP, MRT_DONE, NULL, 0);
+ }
+
- struct mkrt_iface *ifa = HASH_FIND(mkrt_global.ifaces, HASH_I, ifindex);
- if (ifa)
- return ifa;
-
- ifa = mb_allocz(mkrt_global.pool, sizeof(struct mkrt_iface));
- init_list(&ifa->sockets);
- ifa->iface = if_find_by_index(ifindex);
++static inline int
++sk_mrt_add_mif4(sock *s, struct mif *mif)
+ {
- HASH_INSERT(mkrt_global.ifaces, HASH_I, ifa);
- return ifa;
++ struct vifctl vc = {
++ .vifc_vifi = mif->index,
++ .vifc_flags = VIFF_USE_IFINDEX,
++ .vifc_lcl_ifindex = mif->iface->index,
++ };
+
-/*
- * Add the socket into the list of sockets that are passed a copy of every IGMP
- * packet received on the control socket.
- */
-void
-mkrt_listen(sock *s)
++ return setsockopt(s->fd, IPPROTO_IP, MRT_ADD_VIF, &vc, sizeof(vc));
+ }
+
- ASSERT(s->type == SK_IGMP);
++static inline int
++sk_mrt_del_mif4(sock *s, struct mif *mif)
+ {
- if (s->iface)
- {
- struct mkrt_iface *i = mkrt_iface_get(s->iface->index);
- add_tail(&i->sockets, &s->n);
- }
- else
- add_tail(&mkrt_global.sockets, &s->n);
-
- log(L_INFO "Socket fd %i getting IGMP", s->fd);
++ struct vifctl vc = {
++ .vifc_vifi = mif->index,
++ };
+
-/*
- * Forward a packet from one socket to another. Emulates the receiving routine.
- * Socket is in exactly the same state as if it received the packet itself, but
- * must not modify it to preserve it for others.
- */
-static inline void
-mkrt_rx_forward(sock *from, sock *to, int len)
++ return setsockopt(s->fd, IPPROTO_IP, MRT_DEL_VIF, &vc, sizeof(vc));
+ }
+
- if (!to->rx_hook)
- return;
-
- to->faddr = from->faddr;
- if (to->flags & SKF_LADDR_RX)
- {
- to->laddr = from->laddr;
- to->lifindex = from->lifindex;
- }
-
- to->rbuf = from->rbuf;
- to->rpos = from->rpos;
- to->rbsize = from->rbsize;
-
- to->rx_hook(to, len);
++static inline int
++sk_mrt_add_mfc4(sock *s, ip4_addr src, ip4_addr grp, u32 iifs, u32 oifs, int mif_index)
+ {
- to->faddr = to->laddr = IPA_NONE;
- to->lifindex = 0;
- to->rbuf = to->rpos = NULL;
- to->rbsize = 0;
++ struct mfcctl mc = {
++ .mfcc_origin = ip4_to_in4(src),
++ .mfcc_mcastgrp = ip4_to_in4(grp),
++ .mfcc_parent = mif_index,
++ };
++
++ if (BIT32_TEST(&iifs, mif_index) && oifs)
++ for (int i = 0; i < MIFS_MAX; i++)
++ if (BIT32_TEST(&oifs, i) && (i != mif_index))
++ mc.mfcc_ttls[i] = 1;
+
-/*
- * Forward a packet to all sockets on a list.
- */
-static inline void
-mkrt_rx_forward_all(list *sockets, sock *sk, int len)
++ return setsockopt(s->fd, IPPROTO_IP, MRT_ADD_MFC, &mc, sizeof(mc));
+ }
+
- node *n, *next;
++static inline int
++sk_mrt_del_mfc4(sock *s, ip4_addr src, ip4_addr grp)
+ {
- WALK_LIST_DELSAFE(n, next, *sockets)
- mkrt_rx_forward(sk, SKIP_BACK(sock, n, n), len);
++ struct mfcctl mc = {
++ .mfcc_origin = ip4_to_in4(src),
++ .mfcc_mcastgrp = ip4_to_in4(grp),
++ };
+
-/***************
- Mkernel proto
- ***************/
++ return setsockopt(s->fd, IPPROTO_IP, MRT_DEL_MFC, &mc, sizeof(mc));
+ }
+
- * Call a setsockopt with a MRT_ option.
+
+ /*
-static inline int
-mkrt_call(struct mkrt_proto *mkrt, int option_name, const void *val, socklen_t len)
-{
- return setsockopt(mkrt->igmp_sock->fd, IPPROTO_IP, option_name, val, len);
-}
++ * MIF handling
+ */
-static inline vifi_t
-mkrt_alloc_vifi(struct mkrt_proto *p, struct iface *iface)
+
- if (p->vif_count >= MAXVIFS)
- {
- log(L_ERR "Maximum number of interfaces for multicast routing reached.");
- return -1;
- }
-
- for (vifi_t i = 0; ; i = (i + 1) % MAXVIFS)
- if (p->vif_map[i] == NULL)
- {
- p->vif_map[i] = iface;
- iface->vifi = i;
- p->vif_count++;
- return i;
- }
-}
++void
++mkrt_register_mif(struct mkrt_proto *p, struct mif *mif)
+ {
-static inline void
-mkrt_free_vifi(struct mkrt_proto *p, vifi_t vifi)
-{
- p->vif_count -= p->vif_map[vifi] != NULL;
- p->vif_map[vifi] = NULL;
++ TRACE(D_EVENTS, "Registering interface %s MIF %i", mif->iface->name, mif->index);
+
-static void
-mkrt_add_vif(struct mkrt_proto *p, struct iface *i)
++ if (sk_mrt_add_mif4(p->mrt_sock, mif) < 0)
++ log(L_ERR "%s: Cannot register interface %s MIF %i: %m",
++ p->p.name, mif->iface->name, mif->index);
+ }
+
- int err;
-
- if (i->flags & IF_VIFI_ASSIGNED)
- return;
-
- mkrt_alloc_vifi(p, i);
-
- struct vifctl vc = {0};
- vc.vifc_vifi = i->vifi;
- vc.vifc_flags = VIFF_USE_IFINDEX;
- vc.vifc_lcl_ifindex = i->index;
-
- if ((err = mkrt_call(p, MRT_ADD_VIF, &vc, sizeof(vc))) < 0)
- goto err;
-
- TRACE(D_EVENTS, "Iface %s (%i) assigned VIF %i", i->name, i->index, i->vifi);
-
- i->flags |= IF_VIFI_ASSIGNED;
- return;
++void
++mkrt_unregister_mif(struct mkrt_proto *p, struct mif *mif)
+ {
-err:
- log(L_ERR "Error while assigning %s VIF %i: %m", i->name, i->vifi, err);
- mkrt_free_vifi(p, i->vifi);
++ TRACE(D_EVENTS, "Unregistering interface %s MIF %i", mif->iface->name, mif->index);
+
-static int
-mkrt_del_vif(struct mkrt_proto *p, struct iface *i)
++ if (sk_mrt_del_mif4(p->mrt_sock, mif) < 0)
++ log(L_ERR "%s: Cannot unregister interface %s MIF %i: %m",
++ p->p.name, mif->iface->name, mif->index);
+ }
+
- int err;
-
- if (!i->flags & IF_VIFI_ASSIGNED)
- return 0;
-
- struct vifctl vc = {0};
- vc.vifc_vifi = i->vifi;
-
- if ((err = mkrt_call(p, MRT_DEL_VIF, &vc, sizeof(vc))) < 0)
- goto err;
++void
++mkrt_register_mif_group(struct mkrt_proto *p, struct mif_group *grp)
+ {
- mkrt_free_vifi(p, i->vifi);
- i->flags &= ~IF_VIFI_ASSIGNED;
- return 0;
-err:
- log(L_ERR "Error while unassigning %s VIF %i: %m", i->name, i->vifi, err);
- return err;
++ ASSERT(!grp->owner);
++ grp->owner = &p->p;
+
-static struct mkrt_mfc_group *
-mkrt_mfc_get(struct mkrt_proto *p, ip_addr ga)
++ WALK_ARRAY(grp->mifs, MIFS_MAX, mif)
++ if (mif)
++ mkrt_register_mif(p, mif);
+ }
+
- struct mkrt_mfc_group *mg = HASH_FIND(p->mfc_groups, HASH_MFC, ga);
- if (mg)
- return mg;
-
- mg = mb_allocz(p->p.pool, sizeof(struct mkrt_mfc_group));
- mg->ga = ga;
- init_list(&mg->sources);
- HASH_INSERT(p->mfc_groups, HASH_MFC, mg);
- return mg;
++void
++mkrt_unregister_mif_group(struct mkrt_proto *p, struct mif_group *grp)
+ {
- * Add a MFC entry for (S, G) with parent vifi, according to the route.
++ grp->owner = NULL;
++
++ WALK_ARRAY(grp->mifs, MIFS_MAX, mif)
++ if (mif)
++ mkrt_unregister_mif(p, mif);
+ }
+
++
+ /*
-static int
-mkrt_mfc_update(struct mkrt_proto *p, ip_addr group, ip_addr source, int vifi, struct rte *rte)
++ * MFC handling
+ */
- struct mfcctl mc = {0};
- int err;
++
++static void
++mkrt_init_mfc(void *G)
+ {
- mc.mfcc_origin = ipa_to_in4(source);
- mc.mfcc_mcastgrp = ipa_to_in4(group);
- mc.mfcc_parent = vifi;
++ struct mkrt_mfc_group *grp = G;
+
- if (rte && RTE_MGRP_ISSET(p->vif_map[vifi], rte->u.mkrt.iifs))
- for (int i = 0; i < MAXVIFS; i++)
- if (RTE_MGRP_ISSET(p->vif_map[i], rte->u.mkrt.oifs))
- mc.mfcc_ttls[i] = 1;
++ init_list(&grp->sources);
++}
+
- TRACE(D_EVENTS, "%s MFC entry for (%I, %I)", (vifi > 0) ? "Add" : "Delete", source, group);
- if ((err = mkrt_call(p, (vifi > 0) ? MRT_ADD_MFC : MRT_DEL_MFC, &mc, sizeof(mc)) < 0))
- log(L_WARN "Mkernel: failed to %s MFC entry: %m", (vifi > 0) ? "add" : "delete", err);
++static struct mkrt_mfc_source *
++mkrt_get_mfc(struct mkrt_proto *p, ip4_addr source, ip4_addr group)
++{
++ net_addr_mgrp4 n = NET_ADDR_MGRP4(group);
++ struct mkrt_mfc_group *grp = fib_get(&p->mfc_groups, (net_addr *) &n);
+
- return err;
++ struct mkrt_mfc_source *src;
++ WALK_LIST(src, grp->sources)
++ if (ip4_equal(src->addr, source))
++ return src;
+
-struct mfc_request {
- ip_addr *group, *source;
- vifi_t vifi;
++ src = mb_allocz(p->p.pool, sizeof(struct mkrt_mfc_source));
++ src->addr = source;
++ src->parent = -1;
++ add_tail(&grp->sources, NODE src);
++
++ return src;
+ }
+
-/*
- * Expand the attributes from the struct mfc_request and call mkrt_mfc_update,
- * and pass back the result.
- */
++struct mfc_result {
+ u32 iifs, oifs;
+ };
+
-mkrt_mfc_call_update(struct proto *p, void *data, rte *rte)
+ static void
- struct mfc_request *req = data;
- mkrt_mfc_update((struct mkrt_proto *) p, *req->group, *req->source, req->vifi, rte);
- if (rte)
- {
- req->iifs = rte->u.mkrt.iifs;
- req->oifs = rte->u.mkrt.oifs;
- }
++mkrt_resolve_mfc_hook(struct proto *p UNUSED, void *data, rte *rte)
+ {
-mkrt_mfc_resolve(struct mkrt_proto *p, ip_addr group, ip_addr source, vifi_t vifi)
++ struct mfc_result *res = data;
++ res->iifs = rta_iifs(rte->attrs);
++ res->oifs = rta_oifs(rte->attrs);
+ }
+
+ /*
+ * Resolve the MFC miss by adding a MFC entry. If no matching entry in the
+ * routing table exists, add an empty one to satisfy the kernel.
+ */
+ static void
- struct iface *iface = p->vif_map[vifi];
++mkrt_resolve_mfc(struct mkrt_proto *p, ip4_addr src, ip4_addr grp, int mif_index)
+ {
- TRACE(D_EVENTS, "MFC miss for (%I, %I, %s)", source, group, iface ? iface->name : "??");
++ struct mif *mif = (mif_index < MIFS_MAX) ? p->mif_group->mifs[mif_index] : NULL;
+
- net_addr_mgrp4 n0 = NET_ADDR_MGRP4(ipa_to_ip4(group));
++ TRACE(D_EVENTS, "MFC miss for (%I4, %I4, %s)", src, grp, mif ? mif->iface->name : "?");
+
- struct mfc_request req = { &group, &source, vifi };
- if (!rt_route(p->p.main_channel, (net_addr *) &n0, mkrt_mfc_call_update, &req))
- mkrt_mfc_call_update((struct proto *) p, &req, NULL);
++ net_addr_mgrp4 n0 = NET_ADDR_MGRP4(grp);
++ struct mfc_result res = {};
+
- struct mkrt_mfc_group *grp = mkrt_mfc_get(p, group);
- struct mkrt_mfc_source *src = mb_alloc(p->p.pool, sizeof(struct mkrt_mfc_source));
- src->addr = source;
- src->vifi = vifi;
- src->iifs = req.iifs;
- src->oifs = req.oifs;
- add_tail(&grp->sources, NODE src);
++ rt_examine(p->p.main_channel, (net_addr *) &n0, mkrt_resolve_mfc_hook, &res);
+
- * entries are now wrong. Instead of correcting them, flush the cache.
++ struct mkrt_mfc_source *mfc = mkrt_get_mfc(p, src, grp);
++ mfc->iifs = res.iifs;
++ mfc->oifs = res.oifs;
++ mfc->parent = mif_index;
++
++ TRACE(D_EVENTS, "Adding MFC entry for (%I4, %I4)", src, grp);
++
++ if (sk_mrt_add_mfc4(p->mrt_sock, src, grp, mfc->iifs, mfc->oifs, mfc->parent) < 0)
++ log(L_ERR "%s: Failed to add MFC entry: %m", p->p.name);
+ }
+
++static void
++mkrt_remove_mfc(struct mkrt_proto *p, struct mkrt_mfc_source *src, ip4_addr grp)
++{
++ TRACE(D_EVENTS, "Removing MFC entry for (%I4, %I4)", src->addr, grp);
++
++ if (sk_mrt_del_mfc4(p->mrt_sock, src->addr, grp) < 0)
++ log(L_ERR "%s: Failed to remove MFC entry: %m", p->p.name);
++
++ rem_node(NODE src);
++ mb_free(src);
++}
++
++
+ /*
+ * Because a route in the internal table has changed, all the corresponding MFC
-mkrt_mfc_clean(struct mkrt_proto *p, struct mkrt_mfc_group *mg)
++ * entries are now wrong. Instead of correcting them, just flush the cache.
+ */
+ static void
- struct mkrt_mfc_source *n, *next;
- WALK_LIST_DELSAFE(n, next, mg->sources)
- {
- mkrt_mfc_update(p, mg->ga, n->addr, -1, NULL);
- rem_node(NODE n);
- mb_free(n);
- }
++mkrt_reset_mfc_group(struct mkrt_proto *p, struct mkrt_mfc_group *grp)
+ {
-mkrt_mfc_free(struct mkrt_proto *p, struct mkrt_mfc_group *mg)
++ ip4_addr group = net4_prefix(grp->n.addr);
++
++ struct mkrt_mfc_source *src;
++ WALK_LIST_FIRST(src, grp->sources)
++ mkrt_remove_mfc(p, src, group);
+ }
+
+ static void
- mkrt_mfc_clean(p, mg);
- HASH_REMOVE(p->mfc_groups, HASH_MFC, mg);
- mb_free(mg);
++mkrt_free_mfc_group(struct mkrt_proto *p, struct mkrt_mfc_group *grp)
+ {
-/*
- * An IGMP message received on the socket can be not only a packet received
- * from the network, but also a so-called upcall from the kernel. We must process them here.
- */
-static int
-mkrt_control_message(struct mkrt_proto *p, sock *sk, int len)
++ mkrt_reset_mfc_group(p, grp);
++ fib_delete(&p->mfc_groups, grp);
+ }
+
- struct igmpmsg *msg = (struct igmpmsg *) sk->rbuf;
- u8 igmp_type = * (u8 *) sk_rx_buffer(sk, &len);
++static void
++mkrt_rt_notify(struct proto *P, struct channel *c UNUSED, net *net, rte *new, rte *old UNUSED, ea_list *attrs UNUSED)
+ {
- switch (igmp_type)
- {
- case IGMPMSG_NOCACHE:
- mkrt_mfc_resolve(p, ipa_from_in4(msg->im_dst), ipa_from_in4(msg->im_src), msg->im_vif);
- return 1;
-
- case IGMPMSG_WRONGVIF:
- case IGMPMSG_WHOLEPKT:
- /* Neither should ever happen. IGMPMSG_WRONGVIF is a common situation,
- * and this upcall is called only when switching to (S,G) tree in other
- * PIM variants.
- *
- * Similarly, the WHOLEPKT should be called only when we add the register
- * VIF and ask kernel for giving us whole packets
- */
- return 1;
-
- default:
- return 0;
- }
++ struct mkrt_proto *p = (void *) P;
++ struct mkrt_mfc_group *grp = fib_find(&p->mfc_groups, net->n.addr);
+
-mkrt_rx_hook(sock *sk, int len)
++ if (!grp)
++ return;
++
++ /* Drop all MFC entries (possibly along with the state information) for a group */
++ if (new)
++ mkrt_reset_mfc_group(p, grp);
++ else
++ mkrt_free_mfc_group(p, grp);
+ }
+
++
++/*
++ * On MRT control socket, we receive not only regular IGMP messages but also
++ * so-called upcalls from the kernel. We must process them here.
++ */
++void mif_forward_igmp(struct mif_group *grp, struct mif *mif, sock *src, int len);
++
+ static int
- /* Do not forward upcalls, IGMP cannot parse them */
- if (mkrt_control_message(p, sk, len))
++mkrt_rx_hook(sock *sk, uint len)
+ {
+ struct mkrt_proto *p = sk->data;
++ struct igmpmsg *msg = (void *) sk->rbuf;
++ u8 igmp_type = * (u8 *) sk_rx_buffer(sk, &len);
+
- mkrt_rx_forward_all(&mkrt_global.sockets, sk, len);
-
- struct mkrt_iface *ifa = mkrt_iface_find(p, sk->lifindex);
- if (ifa)
- mkrt_rx_forward_all(&ifa->sockets, sk, len);
++ switch (igmp_type)
++ {
++ case IGMPMSG_NOCACHE:
++ mkrt_resolve_mfc(p, ip4_from_in4(msg->im_src), ip4_from_in4(msg->im_dst), msg->im_vif);
+ return 1;
+
- return 1;
++ case IGMPMSG_WRONGVIF:
++ case IGMPMSG_WHOLEPKT:
++ /* These should not happen unless some PIM-specific MRT options are enabled */
++ return 1;
+
- log(L_TRACE "IGMP error: %m", err);
-}
-
-static void
-mkrt_preconfig(struct protocol *P UNUSED, struct config *c UNUSED)
-{
- mkrt_cf = NULL;
-}
-
-struct proto_config *
-mkrt_config_init(int class)
-{
- if (mkrt_cf)
- cf_error("Kernel multicast route syncer already defined");
-
- mkrt_cf = (struct mkrt_config *) proto_config_new(&proto_mkrt, class);
- return (struct proto_config *) mkrt_cf;
-}
-
-void
-mkrt_config_finish(struct proto_config *pc)
-{
- struct channel_config *cc = proto_cf_main_channel(pc);
-
- if (!cc)
- cc = channel_config_new(NULL, NET_MGRP4, pc);
++ default:
++ // FIXME: Use sk->lifindex or msg->im_vif ?
++ mif_forward_igmp(p->mif_group, NULL, sk, len);
++ return 1;
++ }
+ }
+
+ static void
+ mkrt_err_hook(sock *sk, int err)
+ {
- cc->ra_mode = RA_OPTIMAL;
++ struct mkrt_proto *p = sk->data;
+
-mkrt_init_sock(struct mkrt_proto *p)
++ log(L_ERR "%s: Socket error: %M", p->p.name, err);
+ }
+
+ static int
- sock *sk;
-
- if (!(sk = sk_new(p->p.pool)))
- goto err;
-
++mkrt_open_socket(struct mkrt_proto *p)
+ {
- goto err_sk;
++ sock *sk = sk_new(p->p.pool);
+ sk->type = SK_IP;
+ sk->subtype = SK_IPV4;
+ sk->dport = IPPROTO_IGMP;
+ sk->flags = SKF_LADDR_RX;
+
+ sk->data = p;
+ sk->ttl = 1;
+ sk->rx_hook = mkrt_rx_hook;
+ sk->err_hook = mkrt_err_hook;
+
+ sk->rbsize = 4096;
+ sk->tbsize = 0;
+
+ if (sk_open(sk) < 0)
- p->igmp_sock = sk;
++ {
++ sk_log_error(sk, p->p.name);
++ goto err;
++ }
+
- int v = 1;
- if (mkrt_call(p, MRT_INIT, &v, sizeof(v)) < 0)
- {
- if (errno == EADDRINUSE)
- log(L_ERR "Mkernel: Another multicast routing daemon is running");
- else
- log(L_ERR "Mkernel: Cannot enable multicast features in kernel: %m", errno);
- goto err_sk;
- }
++ if (sk_mrt_init4(sk) < 0)
++ {
++ if (errno == EADDRINUSE)
++ log(L_ERR "%s: Another multicast daemon is running", p->p.name);
++ else
++ log(L_ERR "%s: Cannot enable multicast in kernel: %m", p->p.name);
+
- log(L_DEBUG "Multicast control socket open with fd %i", sk->fd);
- return 0;
++ goto err;
++ }
+
-err_sk:
- rfree(sk);
- p->igmp_sock = NULL;
++ p->mrt_sock = sk;
++ return 1;
+
- return -1;
+ err:
-void
-mkrt_rt_notify(struct proto *P, struct channel *c, net *net, rte *new, rte *old, ea_list *attrs)
++ rfree(sk);
++ return 0;
+ }
+
- struct mkrt_proto *p = (struct mkrt_proto *) P;
- net_addr *n = net->n.addr;
- struct mkrt_mfc_group *mg = mkrt_mfc_get(p, net_prefix(n));
-
- /* Drop all MFC entries (possibly along with the state information) for a group */
- if (new)
- mkrt_mfc_clean(p, mg);
- else
- mkrt_mfc_free(p, mg);
++static void
++mkrt_close_socket(struct mkrt_proto *p)
+ {
-mkrt_if_notify(struct proto *P, uint flags, struct iface *iface)
++ sk_mrt_done4(p->mrt_sock);
++ rfree(p->mrt_sock);
++ p->mrt_sock = NULL;
+ }
+
++
++/*
++ * Protocol glue
++ */
++
++static struct mkrt_config *mkrt_cf;
++
+ static void
- struct mkrt_proto *p = (struct mkrt_proto *) P;
++mkrt_preconfig(struct protocol *P UNUSED, struct config *c UNUSED)
+ {
- if (iface->flags & IF_IGNORE)
- return;
++ mkrt_cf = NULL;
++}
+
- if (flags & IF_CHANGE_UP)
- mkrt_add_vif(p, iface);
++struct proto_config *
++mkrt_init_config(int class)
++{
++ if (mkrt_cf)
++ cf_error("Multicast kernel protocol already defined");
++
++ mkrt_cf = (struct mkrt_config *) proto_config_new(&proto_unix_mkrt, class);
++ return (struct proto_config *) mkrt_cf;
++}
+
- if (flags & IF_CHANGE_DOWN)
- mkrt_del_vif(p, iface);
++void
++mkrt_postconfig(struct proto_config *CF)
++{
++ // struct mkrt_config *cf = (void *) CF;
+
-mkrt_init(struct proto_config *c)
++ if (EMPTY_LIST(CF->channels))
++ cf_error("Channel not specified");
+ }
+
+ static struct proto *
- struct mkrt_proto *p = proto_new(c);
++mkrt_init(struct proto_config *CF)
+ {
- p->p.main_channel = proto_add_channel(&p->p, proto_cf_main_channel(c));
++ struct mkrt_proto *p = proto_new(CF);
+
- p->p.if_notify = mkrt_if_notify;
++ p->p.main_channel = proto_add_channel(&p->p, proto_cf_main_channel(CF));
+
+ p->p.rt_notify = mkrt_rt_notify;
- struct mkrt_proto *p = (struct mkrt_proto *) P;
++
++ p->mif_group = global_mif_group;
+
+ return &p->p;
+ }
+
+ static int
+ mkrt_start(struct proto *P)
+ {
- p->vif_count = 0;
++ struct mkrt_proto *p = (void *) P;
+
- HASH_INIT(p->mfc_groups, p->p.pool, 6);
++ fib_init(&p->mfc_groups, p->p.pool, NET_MGRP4, sizeof(struct mkrt_mfc_group),
++ OFFSETOF(struct mkrt_mfc_group, n), 6, mkrt_init_mfc);
+
- if (mkrt_init_sock(p) < 0)
- return PS_DOWN;
++ if (!mkrt_open_socket(p))
++ return PS_START;
+
- struct mkrt_proto *p = (struct mkrt_proto *) P;
- mkrt_call(p, MRT_DONE, NULL, 0);
- rfree(p->igmp_sock);
++ mkrt_register_mif_group(p, p->mif_group);
+
+ return PS_UP;
+ }
+
+ static int
+ mkrt_shutdown(struct proto *P)
+ {
- struct mkrt_proto *p = (struct mkrt_proto *) P;
- struct mkrt_mfc_source *s;
-
- debug("\tVIFs as in bitmaps:\n\t\t");
- for (int i = MAXVIFS; i >= 0; i--)
- if (p->vif_map[i])
- debug("%s ", p->vif_map[i]->name);
- debug("\n\t(S,G) entries in MFC in kernel:\n");
- HASH_WALK(p->mfc_groups, next, group)
- {
- WALK_LIST(s, group->sources)
- debug("\t\t(%I, %I, %s) -> %b %b\n", s->addr, group->ga, p->vif_map[s->vifi]->name, s->iifs, s->oifs);
- }
- HASH_WALK_END;
++ struct mkrt_proto *p = (void *) P;
++
++ if (p->p.proto_state == PS_START)
++ return PS_DOWN;
++
++ mkrt_unregister_mif_group(p, p->mif_group);
++ mkrt_close_socket(p);
++
+ return PS_DOWN;
+ }
+
++static int
++mkrt_reconfigure(struct proto *p, struct proto_config *CF)
++{
++ // struct mkrt_config *o = (void *) p->cf;
++ // struct mkrt_config *n = (void *) CF;
++
++ if (!proto_configure_channel(p, &p->main_channel, proto_cf_main_channel(CF)))
++ return 0;
++
++ return 1;
++}
++
+ static void
+ mkrt_dump(struct proto *P)
+ {
-struct protocol proto_mkrt = {
- .name = "mkernel",
- .template = "mkernel%d",
- .proto_size = sizeof(struct mkrt_proto),
- .config_size = sizeof(struct proto_config),
- .channel_mask = NB_MGRP,
- .preconfig = mkrt_preconfig,
- .init = mkrt_init,
- .start = mkrt_start,
- .shutdown = mkrt_shutdown,
- .dump = mkrt_dump,
++ struct mkrt_proto *p = (void *) P;
++
++ debug("\t(S,G) entries in MFC in kernel:\n");
++ FIB_WALK(&p->mfc_groups, struct mkrt_mfc_group, grp)
++ {
++ struct mkrt_mfc_source *src;
++ WALK_LIST(src, grp->sources)
++ debug("\t\t(%I4, %I4, %d) -> %b %b\n",
++ src->addr, net4_prefix(grp->n.addr), src->parent, src->iifs, src->oifs);
++ }
++ FIB_WALK_END;
+ }
+
++
++struct protocol proto_unix_mkrt = {
++ .name = "MKernel",
++ .template = "mkernel%d",
++ .channel_mask = NB_MGRP4,
++ .proto_size = sizeof(struct mkrt_proto),
++ .config_size = sizeof(struct mkrt_config),
++ .preconfig = mkrt_preconfig,
++ .postconfig = mkrt_postconfig,
++ .init = mkrt_init,
++ .start = mkrt_start,
++ .shutdown = mkrt_shutdown,
++ .reconfigure = mkrt_reconfigure,
++ .dump = mkrt_dump,
+ };