Implement basics of prefix aggregation functionality

author Igor Putovny <igor.putovny@nic.cz>

Mon, 11 Sep 2023 10:38:19 +0000 (12:38 +0200)

committer Igor Putovny <igor.putovny@nic.cz>

Tue, 29 Apr 2025 13:01:15 +0000 (15:01 +0200)
author Igor Putovny <igor.putovny@nic.cz>
Mon, 11 Sep 2023 10:38:19 +0000 (12:38 +0200)
committer Igor Putovny <igor.putovny@nic.cz>
Tue, 29 Apr 2025 13:01:15 +0000 (15:01 +0200)
diff --git a/lib/birdlib.h b/lib/birdlib.h

index 5687f10bd2f5467e9f63081ad1b6a38a2f66bd4d..70ea3fd3895995539b5f7f51a185ddb912aa8dcd 100644 (file)
--- a/lib/birdlib.h
+++ b/lib/birdlib.h
@@ -16,8 +16,11 @@
  
  /* Ugly structure offset handling macros */
  
+#define SAME_TYPE(a, b)        ({ int _ = ((a) != (b)); !_; })
+#define TYPE_CAST(from, to, what) ( SAME_TYPE(((from) NULL), (what)), ((to) (what)))
+
  #define OFFSETOF(s, i) ((size_t) &((s *)0)->i)
-#define SKIP_BACK(s, i, p) ((s *)((char *)p - OFFSETOF(s, i)))
+#define SKIP_BACK(s, i, p) ({ s *_ptr = ((s *)((char *)p - OFFSETOF(s, i))); SAME_TYPE(&_ptr->i, p); _ptr; })
  #define BIRD_ALIGN(s, a) (((s)+a-1)&~(a-1))
  #define CPU_STRUCT_ALIGN (alignof(max_align_t))
  
diff --git a/lib/settle.h b/lib/settle.h

new file mode 100644 (file)

index 0000000..4c92c3c
--- /dev/null
+++ b/lib/settle.h
@@ -0,0 +1,64 @@
+/*
+ *     BIRD -- Settle timer
+ *
+ *     (c) 2022 Maria Matejka <mq@jmq.cz>
+ *     (c) 2022 CZ.NIC z.s.p.o.
+ *
+ *     Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_SETTLE_H_
+#define _BIRD_SETTLE_H_
+
+#include "lib/birdlib.h"
+#include "lib/timer.h"
+
+struct settle_config {
+  btime min, max;
+};
+
+struct settle {
+  union {
+    /* Timer hook polymorphism. */
+    struct {
+      resource _r;
+      void (*hook)(struct settle *);
+    };
+    timer tm;
+  };
+  struct settle_config cf;
+  btime started;
+};
+
+STATIC_ASSERT(OFFSETOF(struct settle, hook) == OFFSETOF(struct settle, tm) + OFFSETOF(timer, hook));
+
+#define SETTLE_INIT(_cfp, _hook, _data) (struct settle) { .tm = { .data = (_data), .hook = TYPE_CAST(void (*)(struct settle *), void (*)(struct timer *), (_hook)), }, .cf = ({ASSERT_DIE((_cfp)->min <= (_cfp)->max); *(_cfp); }), }
+
+
+static inline void settle_init(struct settle *s, struct settle_config *cf, void (*hook)(struct settle *), void *data)
+{
+  *s = SETTLE_INIT(cf, hook, data);
+}
+
+#define settle_active(s) tm_active(&(s)->tm)
+
+static inline void settle_kick(struct settle *s)
+{
+  if (!tm_active(&s->tm))
+  {
+    s->started = current_time();
+    tm_set(&s->tm, s->started + s->cf.min);
+  }
+  else
+  {
+    btime now = current_time();
+    tm_set(&s->tm, MIN_(now + s->cf.min, s->started + s->cf.max));
+  }
+}
+
+static inline void settle_cancel(struct settle *s)
+{
+  tm_stop(&s->tm);
+}
+
+#endif
diff --git a/proto/aggregator/Makefile b/proto/aggregator/Makefile

index d1dae8dd3ba361f33bc3b3d71a1b24f2c1f4e20c..46aaeda72bc8322e8e9704ac8b67b4676455c9ab 100644 (file)
--- a/proto/aggregator/Makefile
+++ b/proto/aggregator/Makefile
@@ -1,4 +1,4 @@
-src := aggregator.c
+src := aggregator.c trie.c
  obj := $(src-o-files)
  $(all-daemon)
  $(cf-local)
diff --git a/proto/aggregator/aggregator.c b/proto/aggregator/aggregator.c

index e5c2a176826c1a42f0bb55f914eccae4ff39f032..c392ef7cd453a14e32a249e3ae6a8d343fd87219 100644 (file)
--- a/proto/aggregator/aggregator.c
+++ b/proto/aggregator/aggregator.c
@@ -1,37 +1,31 @@
  /*
- *     BIRD Internet Routing Daemon -- Route aggregation
+ *  BIRD Internet Routing Daemon -- Route aggregation
   *
- *     (c) 2023--2023 Igor Putovny <igor.putovny@nic.cz>
- *     (c) 2023       CZ.NIC, z.s.p.o.
+ *  (c) 2023--2025 Igor Putovny <igor.putovny@nic.cz>
+ *  (c) 2023--2025 Maria Matejka <mq@ucw.cz>
+ *  (c) 2025       CZ.NIC, z.s.p.o.
   *
   *     Can be freely distributed and used under the terms of the GNU GPL.
   */
  
  /**
- * DOC: Route aggregation
+ * DOC: Aggregator protocol
   *
- * This is an implementation of route aggregation functionality.
- * It enables user to specify a set of route attributes in the configuarion file
- * and then, for a given destination (net), aggregate routes with the same
- * values of these attributes into a single multi-path route.
+ * The purpose of the aggregator protocol is to aggregate routes based on
+ * user-specified set of route attributes. It can be used for aggregating
+ * routes for a given destination (net) or for aggregating prefixes.
   *
- * Structure &channel contains pointer to aggregation list which is represented
- * by &aggr_list_linearized. In rt_notify_aggregated(), attributes from this
- * list are evaluated for every route of a given net and results are stored
- * in &rte_val_list which contains pointer to this route and array of &f_val.
- * Array of pointers to &rte_val_list entries is sorted using
- * sort_rte_val_list(). For comparison of &f_val structures, val_compare()
- * is used. Comparator function is written so that sorting is stable. If all
- * attributes have the same values, routes are compared by their global IDs.
+ * Aggregation of routes for networks means that for each destination, routes
+ * with the same values of attributes will be aggregated into a single
+ * multi-path route. Aggregation is performed by inserting routes into a hash
+ * table based on values of their attributes and generating new routes from
+ * the routes in th same bucket. Buckets are represented by @aggregator_bucket,
+ * which contains linked list of @aggregator_route.
   *
- * After sorting, &rte_val_list entries containing equivalent routes will be
- * adjacent to each other. Function process_rte_list() iterates through these
- * entries to identify sequences of equivalent routes. New route will be
- * created for each such sequence, even if only from a single route.
- * Only attributes from the aggreagation list will be set for the new route.
- * New &rta is created and prepare_rta() is used to copy static and dynamic
- * attributes to new &rta from &rta of the original route. New route is created
- * by create_merged_rte() from new &rta and exported to the routing table.
+ * Aggregation of prefixes aggregates a given set of prefixes into another set
+ * of prefixes. It offers a reduction in number of prefixes without changing
+ * the routing semantics. Aggregator is capable of processing incremental
+ * updates.
   */
  
  #undef LOCAL_DEBUG
@@ -43,32 +37,103 @@
  #include "nest/bird.h"
  #include "nest/iface.h"
  #include "filter/filter.h"
-#include "aggregator.h"
+#include "proto/aggregator/aggregator.h"
+
+extern linpool *rte_update_pool;
  
-#include <stdlib.h>
  /*
-#include "nest/route.h"
-#include "nest/iface.h"
-#include "lib/resource.h"
-#include "lib/event.h"
-#include "lib/timer.h"
-#include "lib/string.h"
-#include "conf/conf.h"
-#include "filter/filter.h"
-#include "filter/data.h"
-#include "lib/hash.h"
-#include "lib/string.h"
-#include "lib/alloca.h"
-#include "lib/flowspec.h"
-*/
+ * Allocate unique ID for bucket
+ */
+static inline u32
+aggregator_get_new_bucket_id(struct aggregator_proto *p)
+{
+  u32 id = hmap_first_zero(&p->bucket_id_map);
+  hmap_set(&p->bucket_id_map, id);
+  return id;
+}
  
-extern linpool *rte_update_pool;
+/*
+ * Add @bucket to the list of bucket pointers in @p to position @bucket.id
+ */
+// TODO: enable to reset bucket ptr?
+static void
+aggregator_add_bucket(struct aggregator_proto *p, struct aggregator_bucket *bucket)
+{
+  ASSERT_DIE(p != NULL);
+  ASSERT_DIE(p->bucket_list != NULL);
+  ASSERT_DIE(bucket != NULL);
+
+  /* Bucket is already in the list */
+  if (bucket->id < p->bucket_list_size && p->bucket_list[bucket->id])
+    return;
+
+  const size_t old_size = p->bucket_list_size;
+
+  /* Reallocate if more space is needed */
+  if (bucket->id >= p->bucket_list_size)
+  {
+    while (bucket->id >= p->bucket_list_size)
+      p->bucket_list_size *= 2;
+
+    ASSERT_DIE(old_size < p->bucket_list_size);
+
+    p->bucket_list = mb_realloc(p->bucket_list, sizeof(p->bucket_list[0]) * p->bucket_list_size);
+    memset(&p->bucket_list[old_size], 0, sizeof(p->bucket_list[0]) * (p->bucket_list_size - old_size));
+  }
+
+  ASSERT_DIE(bucket->id < p->bucket_list_size);
+  ASSERT_DIE(p->bucket_list[bucket->id] == NULL);
+
+  p->bucket_list[bucket->id] = bucket;
+  p->bucket_list_count++;
+}
+
+/*
+ * Withdraw all routes that are on the stack
+ */
+static void
+aggregator_withdraw_rte(struct aggregator_proto *p)
+{
+  if ((p->addr_type == NET_IP4 && p->rte_withdrawal_count > IP4_WITHDRAWAL_MAX_EXPECTED_LIMIT) ||
+      (p->addr_type == NET_IP6 && p->rte_withdrawal_count > IP6_WITHDRAWAL_MAX_EXPECTED_LIMIT))
+    log(L_WARN "This number of updates was not expected."
+               "They will be processed, but please, contact the developers.");
+
+  struct rte_withdrawal_item *node = NULL;
+
+  while (node = p->rte_withdrawal_stack)
+  {
+    rte_update2(p->dst, &node->addr, NULL, node->bucket->last_src);
+    p->rte_withdrawal_stack = node->next;
+    p->rte_withdrawal_count--;
+  }
+
+  ASSERT_DIE(p->rte_withdrawal_stack == NULL);
+  ASSERT_DIE(p->rte_withdrawal_count == 0);
+
+  lp_flush(p->rte_withdrawal_pool);
+}
+
+static void
+aggregator_aggregate_on_feed_end(struct channel *C)
+{
+  struct aggregator_proto *p = SKIP_BACK(struct aggregator_proto, p, C->proto);
+
+  if (C != p->src)
+    return;
+
+  ASSERT_DIE(p->aggr_mode == PREFIX_AGGR);
+  ASSERT_DIE(p->root != NULL);
+
+  p->initial_feed = 0;
+  aggregator_aggregate(p);
+}
  
  /*
   * Set static attribute in @rta from static attribute in @old according to @sa.
   */
  static void
-rta_set_static_attr(struct rta *rta, const struct rta *old, struct f_static_attr sa)
+aggregator_rta_set_static_attr(struct rta *rta, const struct rta *old, struct f_static_attr sa)
  {
    switch (sa.sa_code)
    {
@@ -133,9 +198,9 @@ rta_set_static_attr(struct rta *rta, const struct rta *old, struct f_static_attr
   * @count: number of &f_val entries
   */
  static int
-same_val_list(const struct f_val *v1, const struct f_val *v2, uint len)
+aggregator_same_val_list(const struct f_val *v1, const struct f_val *v2, u32 len)
  {
-  for (uint i = 0; i < len; i++)
+  for (u32 i = 0; i < len; i++)
      if (!val_same(&v1[i], &v2[i]))
        return 0;
  
@@ -143,13 +208,9 @@ same_val_list(const struct f_val *v1, const struct f_val *v2, uint len)
  }
  
  /*
- * Create and export new merged route.
- * @old: first route in a sequence of equivalent routes that are to be merged
- * @rte_val: first element in a sequence of equivalent rte_val_list entries
- * @length: number of equivalent routes that are to be merged (at least 1)
- * @ail: aggregation list
+ * Create and export new merged route
   */
-static void
+void
  aggregator_bucket_update(struct aggregator_proto *p, struct aggregator_bucket *bucket, struct network *net)
  {
    /* Empty bucket */
@@ -166,13 +227,13 @@ aggregator_bucket_update(struct aggregator_proto *p, struct aggregator_bucket *b
    rta->source = RTS_AGGREGATED;
    rta->scope = SCOPE_UNIVERSE;
  
-  struct ea_list *eal = allocz(sizeof(struct ea_list) + sizeof(struct eattr) * p->aggr_on_da_count);
+  struct ea_list *eal = allocz(sizeof(*eal) + sizeof(struct eattr) * p->aggr_on_da_count);
    eal->next = NULL;
    eal->count = 0;
    rta->eattrs = eal;
  
    /* Seed the attributes from aggregator rule */
-  for (uint i = 0; i < p->aggr_on_count; i++)
+  for (u32 i = 0; i < p->aggr_on_count; i++)
    {
      if (p->aggr_on[i].type == AGGR_ITEM_DYNAMIC_ATTR)
      {
@@ -183,17 +244,18 @@ aggregator_bucket_update(struct aggregator_proto *p, struct aggregator_bucket *b
          eal->attrs[eal->count++] = *e;
      }
      else if (p->aggr_on[i].type == AGGR_ITEM_STATIC_ATTR)
-      rta_set_static_attr(rta, bucket->rte->attrs, p->aggr_on[i].sa);
+      aggregator_rta_set_static_attr(rta, bucket->rte->attrs, p->aggr_on[i].sa);
    }
  
-  struct rte *new = rte_get_temp(rta, bucket->rte->src);
+  struct rte *new = rte_get_temp(rta, p->p.main_source);
    new->net = net;
  
-  /*
-  log("=============== CREATE MERGED ROUTE ===============");
-  log("New route created: id = %d, protocol: %s", new->src->global_id, new->src->proto->name);
-  log("===================================================");
-  */
+  if (p->logging)
+  {
+    log("=============== CREATE MERGED ROUTE ===============");
+    log("New route created: id = %d, protocol: %s", new->src->global_id, new->src->proto->name);
+    log("===================================================");
+  }
  
    /* merge filter needs one argument called "routes" */
    struct f_val val = {
@@ -223,7 +285,7 @@ aggregator_bucket_update(struct aggregator_proto *p, struct aggregator_bucket *b
      /* We actually don't want this route */
      case F_REJECT:
        if (bucket->last_src)
-       rte_update2(p->dst, net->n.addr, NULL, bucket->last_src);
+        rte_update2(p->dst, net->n.addr, NULL, bucket->last_src);
        break;
    }
  
@@ -232,6 +294,7 @@ aggregator_bucket_update(struct aggregator_proto *p, struct aggregator_bucket *b
    {
      if (new_src)
        rt_lock_source(new_src);
+
      if (bucket->last_src)
        rt_unlock_source(bucket->last_src);
  
@@ -263,7 +326,7 @@ aggregator_reload_buckets(void *data)
   * and store result in @pos.
   */
  static void
-eval_static_attr(const struct rte *rt1, struct f_static_attr sa, struct f_val *pos)
+aggregator_eval_static_attr(const struct rte *rt1, struct f_static_attr sa, struct f_val *pos)
  {
    const struct rta *rta = rt1->attrs;
  
@@ -275,18 +338,18 @@ eval_static_attr(const struct rte *rt1, struct f_static_attr sa, struct f_val *p
  
    switch (sa.sa_code)
    {
-    case SA_NET:       RESULT(sa.f_type, net, rt1->net->n.addr); break;
-    case SA_FROM:       RESULT(sa.f_type, ip, rta->from); break;
-    case SA_GW:                RESULT(sa.f_type, ip, rta->nh.gw); break;
-    case SA_PROTO:         RESULT(sa.f_type, s, rt1->src->proto->name); break;
-    case SA_SOURCE:        RESULT(sa.f_type, i, rta->source); break;
-    case SA_SCOPE:         RESULT(sa.f_type, i, rta->scope); break;
-    case SA_DEST:          RESULT(sa.f_type, i, rta->dest); break;
-    case SA_IFNAME:        RESULT(sa.f_type, s, rta->nh.iface ? rta->nh.iface->name : ""); break;
-    case SA_IFINDEX:   RESULT(sa.f_type, i, rta->nh.iface ? rta->nh.iface->index : 0); break;
-    case SA_WEIGHT:        RESULT(sa.f_type, i, rta->nh.weight + 1); break;
-    case SA_PREF:          RESULT(sa.f_type, i, rta->pref); break;
-    case SA_GW_MPLS:    RESULT(sa.f_type, i, rta->nh.labels ? rta->nh.label[0] : MPLS_NULL); break;
+    case SA_NET:        RESULT(sa.f_type, net, rt1->net->n.addr);                               break;
+    case SA_FROM:       RESULT(sa.f_type, ip, rta->from);                                       break;
+    case SA_GW:         RESULT(sa.f_type, ip, rta->nh.gw);                                      break;
+    case SA_PROTO:      RESULT(sa.f_type, s, rt1->src->proto->name);                            break;
+    case SA_SOURCE:     RESULT(sa.f_type, i, rta->source);                                      break;
+    case SA_SCOPE:      RESULT(sa.f_type, i, rta->scope);                                       break;
+    case SA_DEST:       RESULT(sa.f_type, i, rta->dest);                                        break;
+    case SA_IFNAME:     RESULT(sa.f_type, s, rta->nh.iface ? rta->nh.iface->name : "");         break;
+    case SA_IFINDEX:    RESULT(sa.f_type, i, rta->nh.iface ? rta->nh.iface->index : 0);         break;
+    case SA_WEIGHT:     RESULT(sa.f_type, i, rta->nh.weight + 1);                               break;
+    case SA_PREF:       RESULT(sa.f_type, i, rta->pref);                                        break;
+    case SA_GW_MPLS:    RESULT(sa.f_type, i, rta->nh.labels ? rta->nh.label[0] : MPLS_NULL);    break;
      default:
        bug("Invalid static attribute access (%u/%u)", sa.f_type, sa.sa_code);
    }
@@ -299,7 +362,7 @@ eval_static_attr(const struct rte *rt1, struct f_static_attr sa, struct f_val *p
   * and store result in @pos.
   */
  static void
-eval_dynamic_attr(const struct rte *rt1, struct f_dynamic_attr da, struct f_val *pos)
+aggregator_eval_dynamic_attr(const struct rte *rt1, struct f_dynamic_attr da, struct f_val *pos)
  {
    const struct rta *rta = rt1->attrs;
    const struct eattr *e = ea_find(rta->eattrs, da.ea_code);
@@ -387,7 +450,8 @@ eval_dynamic_attr(const struct rte *rt1, struct f_dynamic_attr da, struct f_val
  #undef RESULT_VOID
  }
  
-static inline u32 aggr_route_hash(const rte *e)
+static inline u32
+aggregator_route_hash(const rte *e)
  {
    struct {
      net *net;
@@ -403,7 +467,7 @@ static inline u32 aggr_route_hash(const rte *e)
  #define AGGR_RTE_KEY(n)                        (&(n)->rte)
  #define AGGR_RTE_NEXT(n)               ((n)->next_hash)
  #define AGGR_RTE_EQ(a,b)               (((a)->src == (b)->src) && ((a)->net == (b)->net))
-#define AGGR_RTE_FN(_n)                        aggr_route_hash(_n)
+#define AGGR_RTE_FN(_n)                        aggregator_route_hash(_n)
  #define AGGR_RTE_ORDER                 4 /* Initial */
  
  #define AGGR_RTE_REHASH                        aggr_rte_rehash
@@ -414,7 +478,7 @@ HASH_DEFINE_REHASH_FN(AGGR_RTE, struct aggregator_route);
  
  #define AGGR_BUCK_KEY(n)               (n)
  #define AGGR_BUCK_NEXT(n)              ((n)->next_hash)
-#define AGGR_BUCK_EQ(a,b)              (((a)->hash == (b)->hash) && (same_val_list((a)->aggr_data, (b)->aggr_data, p->aggr_on_count)))
+#define AGGR_BUCK_EQ(a,b)              (((a)->hash == (b)->hash) && (aggregator_same_val_list((a)->aggr_data, (b)->aggr_data, p->aggr_on_count)))
  #define AGGR_BUCK_FN(n)                        ((n)->hash)
  #define AGGR_BUCK_ORDER                        4 /* Initial */
  
@@ -431,8 +495,13 @@ aggregator_rt_notify(struct proto *P, struct channel *src_ch, net *net, rte *new
  {
    struct aggregator_proto *p = SKIP_BACK(struct aggregator_proto, p, P);
    ASSERT_DIE(src_ch == p->src);
+
    struct aggregator_bucket *new_bucket = NULL, *old_bucket = NULL;
-  struct aggregator_route *old_route = NULL;
+  struct aggregator_route  *new_route  = NULL, *old_route  = NULL;
+
+  /* Ignore all updates if protocol is not up */
+  if (p->p.proto_state != PS_UP)
+    return;
  
    /* Find the objects for the old route */
    if (old)
@@ -449,9 +518,10 @@ aggregator_rt_notify(struct proto *P, struct channel *src_ch, net *net, rte *new
        return;
  
      /* Evaluate route attributes. */
-    struct aggregator_bucket *tmp_bucket = sl_allocz(p->bucket_slab);
+    struct aggregator_bucket *tmp_bucket = allocz(sizeof(*tmp_bucket) + sizeof(tmp_bucket->aggr_data[0]) * p->aggr_on_count);
+    ASSERT_DIE(tmp_bucket->id == 0);
  
-    for (uint val_idx = 0; val_idx < p->aggr_on_count; val_idx++)
+    for (u32 val_idx = 0; val_idx < p->aggr_on_count; val_idx++)
      {
        int type = p->aggr_on[val_idx].type;
        struct f_val *pos = &tmp_bucket->aggr_data[val_idx];
@@ -472,37 +542,36 @@ aggregator_rt_notify(struct proto *P, struct channel *src_ch, net *net, rte *new
            if (fret > F_RETURN)
              log(L_WARN "%s.%s: Wrong number of items left on stack after evaluation of aggregation list", rt1->src->proto->name, rt1->sender->name);
  
-         switch (pos->type) {
-           case T_VOID:
-           case T_INT:
-           case T_BOOL:
-           case T_PAIR:
-           case T_QUAD:
-           case T_ENUM:
-           case T_IP:
-           case T_EC:
-           case T_LC:
-           case T_RD:
-             /* Fits, OK */
-             break;
-
-           default:
-             log(L_WARN "%s.%s: Expression evaluated to type %s unsupported by aggregator. Store this value as a custom attribute instead", new->src->proto->name, new->sender->name, f_type_name(pos->type));
-             *pos = (struct f_val) { .type = T_INT, .val.i = 0 };
-         }
+             switch (pos->type)
+          {
+               case T_VOID:
+               case T_INT:
+               case T_BOOL:
+               case T_PAIR:
+               case T_QUAD:
+               case T_ENUM:
+               case T_IP:
+               case T_EC:
+               case T_LC:
+               case T_RD:
+                 /* Fits, OK */
+                 break;
+
+               default:
+                 log(L_WARN "%s.%s: Expression evaluated to type %s unsupported by aggregator. Store this value as a custom attribute instead", new->src->proto->name, new->sender->name, f_type_name(pos->type));
+                 *pos = (struct f_val) { .type = T_INT, .val.i = 0 };
+             }
  
            break;
          }
  
-        case AGGR_ITEM_STATIC_ATTR: {
-          eval_static_attr(new, p->aggr_on[val_idx].sa, pos);
+        case AGGR_ITEM_STATIC_ATTR:
+          aggregator_eval_static_attr(new, p->aggr_on[val_idx].sa, pos);
            break;
-        }
  
-        case AGGR_ITEM_DYNAMIC_ATTR: {
-          eval_dynamic_attr(new, p->aggr_on[val_idx].da, pos);
+        case AGGR_ITEM_DYNAMIC_ATTR:
+          aggregator_eval_dynamic_attr(new, p->aggr_on[val_idx].da, pos);
            break;
-        }
  
          default:
            break;
@@ -512,7 +581,8 @@ aggregator_rt_notify(struct proto *P, struct channel *src_ch, net *net, rte *new
      /* Compute the hash */
      u64 haux;
      mem_hash_init(&haux);
-    for (uint i = 0; i < p->aggr_on_count; i++)
+
+    for (u32 i = 0; i < p->aggr_on_count; i++)
      {
        mem_hash_mix_num(&haux, tmp_bucket->aggr_data[i].type);
  
@@ -521,52 +591,52 @@ aggregator_rt_notify(struct proto *P, struct channel *src_ch, net *net, rte *new
  
        switch (tmp_bucket->aggr_data[i].type)
        {
-       case T_VOID:
-         break;
-       case T_INT:
-       case T_BOOL:
-       case T_PAIR:
-       case T_QUAD:
-       case T_ENUM:
-         MX(i);
-         break;
-       case T_EC:
-       case T_RD:
-         MX(ec);
-         break;
-       case T_LC:
-         MX(lc);
-         break;
-       case T_IP:
-         MX(ip);
-         break;
-       case T_NET:
-         mem_hash_mix_num(&haux, net_hash(IT(net)));
-         break;
-       case T_STRING:
-         mem_hash_mix_str(&haux, IT(s));
-         break;
-       case T_PATH_MASK:
-         mem_hash_mix(&haux, IT(path_mask), sizeof(*IT(path_mask)) + IT(path_mask)->len * sizeof (IT(path_mask)->item));
-         break;
-       case T_PATH:
-       case T_CLIST:
-       case T_ECLIST:
-       case T_LCLIST:
-       case T_BYTESTRING:
-         mem_hash_mix(&haux, IT(ad)->data, IT(ad)->length);
-         break;
-       case T_NONE:
-       case T_PATH_MASK_ITEM:
-       case T_ROUTE:
-       case T_ROUTES_BLOCK:
-         bug("Invalid type %s in hashing", f_type_name(tmp_bucket->aggr_data[i].type));
-       case T_SET:
-         MX(t);
-         break;
-       case T_PREFIX_SET:
-         MX(ti);
-         break;
+        case T_VOID:
+          break;
+        case T_INT:
+        case T_BOOL:
+        case T_PAIR:
+        case T_QUAD:
+        case T_ENUM:
+          MX(i);
+          break;
+        case T_EC:
+        case T_RD:
+          MX(ec);
+          break;
+        case T_LC:
+          MX(lc);
+          break;
+        case T_IP:
+          MX(ip);
+          break;
+        case T_NET:
+          mem_hash_mix_num(&haux, net_hash(IT(net)));
+          break;
+        case T_STRING:
+          mem_hash_mix_str(&haux, IT(s));
+          break;
+        case T_PATH_MASK:
+          mem_hash_mix(&haux, IT(path_mask), sizeof(*IT(path_mask)) + IT(path_mask)->len * sizeof (IT(path_mask)->item));
+          break;
+        case T_PATH:
+        case T_CLIST:
+        case T_ECLIST:
+        case T_LCLIST:
+        case T_BYTESTRING:
+          mem_hash_mix(&haux, IT(ad)->data, IT(ad)->length);
+          break;
+        case T_NONE:
+        case T_PATH_MASK_ITEM:
+        case T_ROUTE:
+        case T_ROUTES_BLOCK:
+          bug("Invalid type %s in hashing", f_type_name(tmp_bucket->aggr_data[i].type));
+        case T_SET:
+          MX(t);
+          break;
+        case T_PREFIX_SET:
+          MX(ti);
+          break;
        }
      }
  
@@ -574,11 +644,15 @@ aggregator_rt_notify(struct proto *P, struct channel *src_ch, net *net, rte *new
  
      /* Find the existing bucket */
      if (new_bucket = HASH_FIND(p->buckets, AGGR_BUCK, tmp_bucket))
-      sl_free(tmp_bucket);
+      ;
      else
      {
-      new_bucket = tmp_bucket;
+      new_bucket = lp_allocz(p->bucket_pool, sizeof(*new_bucket) + sizeof(new_bucket->aggr_data[0]) * p->aggr_on_count);
+      memcpy(new_bucket, tmp_bucket, sizeof(*new_bucket) + sizeof(new_bucket->aggr_data[0]) * p->aggr_on_count);
        HASH_INSERT2(p->buckets, AGGR_BUCK, p->p.pool, new_bucket);
+
+      new_bucket->id = aggregator_get_new_bucket_id(p);
+      aggregator_add_bucket(p, new_bucket);
      }
  
      /* Store the route attributes */
@@ -587,47 +661,74 @@ aggregator_rt_notify(struct proto *P, struct channel *src_ch, net *net, rte *new
      else
        new->attrs = rta_lookup(new->attrs);
  
+    if (p->logging)
+      log("New rte: %p, net: %p, src: %p, hash: %x", new, new->net, new->src, aggregator_route_hash(new));
+
      /* Insert the new route into the bucket */
-    struct aggregator_route *arte = sl_alloc(p->route_slab);
+    struct aggregator_route *arte = lp_allocz(p->route_pool, sizeof(*arte));
+
      *arte = (struct aggregator_route) {
        .bucket = new_bucket,
        .rte = *new,
      };
+
      arte->rte.next = new_bucket->rte,
      new_bucket->rte = &arte->rte;
      new_bucket->count++;
      HASH_INSERT2(p->routes, AGGR_RTE, p->p.pool, arte);
+
+    /* New route */
+    new_route = arte;
+    ASSERT_DIE(new_route != NULL);
+
+    if (p->logging)
+      log("Inserting rte: %p, arte: %p, net: %p, src: %p, hash: %x",
+          &arte->rte, arte, arte->rte.net, arte->rte.src, aggregator_route_hash(&arte->rte));
    }
  
    /* Remove the old route from its bucket */
    if (old_bucket)
    {
      for (struct rte **k = &old_bucket->rte; *k; k = &(*k)->next)
+    {
        if (*k == &old_route->rte)
        {
-       *k = (*k)->next;
-       break;
+        *k = (*k)->next;
+        break;
        }
+    }
  
      old_bucket->count--;
      HASH_REMOVE2(p->routes, AGGR_RTE, p->p.pool, old_route);
      rta_free(old_route->rte.attrs);
-    sl_free(old_route);
    }
  
-  /* Announce changes */
-  if (old_bucket)
-    aggregator_bucket_update(p, old_bucket, net);
+  /* Aggregation within nets allows incremental updates */
+  if (p->aggr_mode == NET_AGGR)
+  {
+    /* Announce changes */
+    if (old_bucket)
+      aggregator_bucket_update(p, old_bucket, net);
+
+    if (new_bucket && (new_bucket != old_bucket))
+      aggregator_bucket_update(p, new_bucket, net);
+  }
+  else if (p->aggr_mode == PREFIX_AGGR)
+  {
+    if (!p->initial_feed)
+    {
+      aggregator_recompute(p, old_route, new_route);
  
-  if (new_bucket && (new_bucket != old_bucket))
-    aggregator_bucket_update(p, new_bucket, net);
+      /* Process route withdrawals triggered by recomputation */
+      aggregator_withdraw_rte(p);
+    }
+  }
  
    /* Cleanup the old bucket if empty */
    if (old_bucket && (!old_bucket->rte || !old_bucket->count))
    {
      ASSERT_DIE(!old_bucket->rte && !old_bucket->count);
      HASH_REMOVE2(p->buckets, AGGR_BUCK, p->p.pool, old_bucket);
-    sl_free(old_bucket);
    }
  }
  
@@ -635,6 +736,7 @@ static int
  aggregator_preexport(struct channel *C, struct rte *new)
  {
    struct aggregator_proto *p = SKIP_BACK(struct aggregator_proto, p, C->proto);
+
    /* Reject our own routes */
    if (new->sender == p->dst)
      return -1;
@@ -671,6 +773,7 @@ aggregator_postconfig(struct proto_config *CF)
    cf->dst->debug = cf->src->debug;
  }
  
+// TODO: set pools to NULL?
  static struct proto *
  aggregator_init(struct proto_config *CF)
  {
@@ -681,26 +784,94 @@ aggregator_init(struct proto_config *CF)
    proto_configure_channel(P, &p->src, cf->src);
    proto_configure_channel(P, &p->dst, cf->dst);
  
-  p->aggr_on_count = cf->aggr_on_count;
-  p->aggr_on_da_count = cf->aggr_on_da_count;
-  p->aggr_on = cf->aggr_on;
-  p->merge_by = cf->merge_by;
+  p->aggr_mode          = cf->aggr_mode;
+  p->aggr_on_count      = cf->aggr_on_count;
+  p->aggr_on_da_count   = cf->aggr_on_da_count;
+  p->aggr_on            = cf->aggr_on;
+  p->merge_by           = cf->merge_by;
+  p->logging            = cf->logging;
+  p->bucket_list        = NULL;
+  p->bucket_list_size   = 0;
+  p->bucket_list_count  = 0;
  
    P->rt_notify = aggregator_rt_notify;
    P->preexport = aggregator_preexport;
+  P->feed_end = aggregator_aggregate_on_feed_end;
  
    return P;
  }
  
+/*
+ * Initialize hash table and create default route
+ *
+ * TODO: tohle asi máme zavolat při startu protokolu
+ * a uklidit při vypnutí, ne?
+ */
+static void
+aggregator_trie_init(struct aggregator_proto *p)
+{
+  /* Zero prefix for default route */
+  ip_addr prefix = (p->addr_type == NET_IP4) ? ipa_from_ip4(IP4_NONE) : ipa_from_ip6(IP6_NONE);
+
+  struct net_addr addr = { 0 };
+  net_fill_ipa(&addr, prefix, 0);
+
+  /* Create net for zero prefix */
+  struct network *default_net = mb_allocz(p->p.pool, sizeof(*default_net) + sizeof(addr));
+  net_copy(default_net->n.addr, &addr);
+
+  /* Create route attributes with zero nexthop */
+  struct rta rta = { 0 };
+
+  /* Allocate bucket for root node */
+  struct aggregator_bucket *new_bucket = lp_allocz(p->bucket_pool, sizeof(*new_bucket));
+
+  u64 haux = 0;
+  mem_hash_init(&haux);
+  new_bucket->hash = mem_hash_value(&haux);
+
+  /* Assign ID to the root node bucket */
+  new_bucket->id = aggregator_get_new_bucket_id(p);
+  aggregator_add_bucket(p, new_bucket);
+
+  struct aggregator_route *arte = lp_allocz(p->route_pool, sizeof(*arte));
+
+  *arte = (struct aggregator_route) {
+    .bucket = new_bucket,
+    .rte = { .attrs = rta_lookup(&rta) },
+  };
+
+  /* Put route into bucket */
+  arte->rte.next = new_bucket->rte;
+  new_bucket->rte = &arte->rte;
+  new_bucket->count++;
+
+  arte->rte.net = default_net;
+  default_net->routes = &arte->rte;
+
+  HASH_INSERT2(p->routes, AGGR_RTE, p->p.pool, arte);
+  HASH_INSERT2(p->buckets, AGGR_BUCK, p->p.pool, new_bucket);
+
+  /* Allocate and initialize root node */
+  p->root = aggregator_root_init(new_bucket, p->trie_slab);
+}
+
  static int
  aggregator_start(struct proto *P)
  {
    struct aggregator_proto *p = SKIP_BACK(struct aggregator_proto, p, P);
  
-  p->bucket_slab = sl_new(P->pool, sizeof(struct aggregator_bucket) + AGGR_DATA_MEMSIZE);
+  ASSERT_DIE(p->bucket_pool == NULL);
+  ASSERT_DIE(p->route_pool == NULL);
+  ASSERT_DIE(p->trie_slab == NULL);
+  ASSERT_DIE(p->root == NULL);
+
+  p->addr_type = p->src->table->addr_type;
+
+  p->bucket_pool = lp_new(P->pool);
    HASH_INIT(p->buckets, P->pool, AGGR_BUCK_ORDER);
  
-  p->route_slab = sl_new(P->pool, sizeof(struct aggregator_route));
+  p->route_pool = lp_new(P->pool);
    HASH_INIT(p->routes, P->pool, AGGR_RTE_ORDER);
  
    p->reload_buckets = (event) {
@@ -708,33 +879,64 @@ aggregator_start(struct proto *P)
      .data = p,
    };
  
+  p->initial_feed = 1;
+
+  hmap_init(&p->bucket_id_map, p->p.pool, 1024);
+  hmap_set(&p->bucket_id_map, 0);       /* 0 is default value, do not use it as ID */
+
+  if (p->aggr_mode == PREFIX_AGGR)
+  {
+    ASSERT_DIE(p->trie_slab == NULL);
+    p->trie_slab = sl_new(P->pool, sizeof(struct trie_node));
+
+    ASSERT_DIE(p->bucket_list == NULL);
+    ASSERT_DIE(p->bucket_list_size == 0);
+    ASSERT_DIE(p->bucket_list_count == 0);
+
+    p->bucket_list_size = BUCKET_LIST_INIT_SIZE;
+    p->bucket_list = mb_allocz(p->p.pool, sizeof(p->bucket_list[0]) * p->bucket_list_size);
+
+    p->rte_withdrawal_pool = lp_new(P->pool);
+    p->rte_withdrawal_count = 0;
+
+    aggregator_trie_init(p);
+  }
+
    return PS_UP;
  }
  
  static int
-aggregator_shutdown(struct proto *P)
+aggregator_shutdown(struct proto *P UNUSED)
+{
+  return PS_DOWN;
+}
+
+static void
+aggregator_cleanup(struct proto *P)
  {
    struct aggregator_proto *p = SKIP_BACK(struct aggregator_proto, p, P);
  
-  HASH_WALK_DELSAFE(p->buckets, next_hash, b)
-  {
-    while (b->rte)
-    {
-      struct aggregator_route *arte = SKIP_BACK(struct aggregator_route, rte, b->rte);
-      b->rte = arte->rte.next;
-      b->count--;
-      HASH_REMOVE(p->routes, AGGR_RTE, arte);
-      rta_free(arte->rte.attrs);
-      sl_free(arte);
-    }
+  /*
+   * Linpools will be freed along with other protocol resources but pointers
+   * have to be set to NULL because protocol may be started again.
+   */
+  p->bucket_pool = NULL;
+  p->route_pool = NULL;
+  p->trie_slab = NULL;
+  p->rte_withdrawal_pool = NULL;
  
-    ASSERT_DIE(b->count == 0);
-    HASH_REMOVE(p->buckets, AGGR_BUCK, b);
-    sl_free(b);
-  }
-  HASH_WALK_END;
+  p->root = NULL;
  
-  return PS_DOWN;
+  p->bucket_list = NULL;
+  p->bucket_list_size = 0;
+  p->bucket_list_count = 0;
+
+  p->rte_withdrawal_stack = NULL;
+  p->rte_withdrawal_count = 0;
+
+  p->bucket_id_map = (struct hmap) { 0 };
+
+  p->initial_feed = 1;
  }
  
  static int
@@ -753,24 +955,26 @@ aggregator_reconfigure(struct proto *P, struct proto_config *CF)
      return 0;
  
    /* Compare aggregator rule */
-  for (uint i = 0; i < p->aggr_on_count; i++)
+  for (u32 i = 0; i < p->aggr_on_count; i++)
+  {
      switch (cf->aggr_on[i].type)
      {
        case AGGR_ITEM_TERM:
-       if (!f_same(cf->aggr_on[i].line, p->aggr_on[i].line))
-         return 0;
-       break;
+        if (!f_same(cf->aggr_on[i].line, p->aggr_on[i].line))
+          return 0;
+        break;
        case AGGR_ITEM_STATIC_ATTR:
-       if (memcmp(&cf->aggr_on[i].sa, &p->aggr_on[i].sa, sizeof(struct f_static_attr)) != 0)
-         return 0;
-       break;
+        if (memcmp(&cf->aggr_on[i].sa, &p->aggr_on[i].sa, sizeof(struct f_static_attr)) != 0)
+          return 0;
+        break;
        case AGGR_ITEM_DYNAMIC_ATTR:
-       if (memcmp(&cf->aggr_on[i].da, &p->aggr_on[i].da, sizeof(struct f_dynamic_attr)) != 0)
-         return 0;
-       break;
+        if (memcmp(&cf->aggr_on[i].da, &p->aggr_on[i].da, sizeof(struct f_dynamic_attr)) != 0)
+          return 0;
+        break;
        default:
-       bug("Broken aggregator rule");
+        bug("Broken aggregator rule");
      }
+  }
  
    /* Compare merge filter */
    if (!f_same(cf->merge_by, p->merge_by))
@@ -782,19 +986,37 @@ aggregator_reconfigure(struct proto *P, struct proto_config *CF)
    return 1;
  }
  
+static void
+aggregator_get_status(struct proto *P, byte *buf)
+{
+  struct aggregator_proto *p = SKIP_BACK(struct aggregator_proto, p, P);
+
+  if (p->p.proto_state == PS_DOWN)
+    buf[0] = 0;
+  else
+  {
+    if (p->aggr_mode == PREFIX_AGGR)
+      strcpy(buf, "prefix aggregation");
+    else
+      strcpy(buf, "net aggregation");
+  }
+}
+
  struct protocol proto_aggregator = {
-  .name =              "Aggregator",
-  .template =          "aggregator%d",
-  .class =             PROTOCOL_AGGREGATOR,
-  .preference =                1,
-  .channel_mask =      NB_ANY,
-  .proto_size =                sizeof(struct aggregator_proto),
-  .config_size =       sizeof(struct aggregator_config),
-  .postconfig =                aggregator_postconfig,
-  .init =              aggregator_init,
-  .start =             aggregator_start,
-  .shutdown =          aggregator_shutdown,
-  .reconfigure =       aggregator_reconfigure,
+  .name             = "Aggregator",
+  .template         = "aggregator%d",
+  .class            = PROTOCOL_AGGREGATOR,
+  .preference       = 1,
+  .channel_mask     = NB_ANY,
+  .proto_size       = sizeof(struct aggregator_proto),
+  .config_size      = sizeof(struct aggregator_config),
+  .postconfig       = aggregator_postconfig,
+  .init             = aggregator_init,
+  .start            = aggregator_start,
+  .shutdown         = aggregator_shutdown,
+  .cleanup          = aggregator_cleanup,
+  .reconfigure      = aggregator_reconfigure,
+  .get_status       = aggregator_get_status,
  };
  
  void
diff --git a/proto/aggregator/aggregator.h b/proto/aggregator/aggregator.h

index 19459b1d0208cd4575286e1aaae5a5ff77931cbd..d14e0a11bd907d6f8c386713700079abd3f42c32 100644 (file)
--- a/proto/aggregator/aggregator.h
+++ b/proto/aggregator/aggregator.h
@@ -1,13 +1,11 @@
  /*
   *     BIRD -- Aggregator Pseudoprotocol
   *
- *     (c) 2023       Igor Putovny <igor.putovny@nic.cz>
- *     (c) 2023       Maria Matejka <mq@ucw.cz>
- *     (c) 2023       CZ.NIC z.s.p.o.
+ *     (c) 2023--2025 Igor Putovny <igor.putovny@nic.cz>
+ *     (c) 2023--2025 Maria Matejka <mq@ucw.cz>
+ *     (c) 2025       CZ.NIC z.s.p.o.
   *
   *     Can be freely distributed and used under the terms of the GNU GPL.
- *
- *     This file contains the data structures used by Babel.
   */
  
  #ifndef _BIRD_AGGREGATOR_H_
@@ -17,13 +15,26 @@
  #include "nest/protocol.h"
  #include "lib/hash.h"
  
+#define BUCKET_LIST_INIT_SIZE         16
+#define POTENTIAL_BUCKETS_BITMAP_SIZE 8
+#define MAX_POTENTIAL_BUCKETS_COUNT   ((int)(sizeof(u32) * 8 * POTENTIAL_BUCKETS_BITMAP_SIZE))
+
+#define IP4_WITHDRAWAL_MAX_EXPECTED_LIMIT 100
+#define IP6_WITHDRAWAL_MAX_EXPECTED_LIMIT 200
+
+enum aggregation_mode {
+  NET_AGGR, PREFIX_AGGR,
+};
+
  struct aggregator_config {
    struct proto_config c;
    struct channel_config *src, *dst;
-  uint aggr_on_count;
-  uint aggr_on_da_count;
+  enum aggregation_mode aggr_mode;
+  u32 aggr_on_count;
+  u32 aggr_on_da_count;
    struct aggr_item *aggr_on;
    const struct f_line *merge_by;
+  int logging;
  };
  
  struct aggregator_route {
@@ -34,33 +45,61 @@ struct aggregator_route {
  
  struct aggregator_bucket {
    struct aggregator_bucket *next_hash;
-  struct rte *rte;                     /* Pointer to struct aggregator_route.rte */
-  struct rte_src *last_src;            /* Which src we announced the bucket last with */
+  struct rte *rte;                      /* Pointer to struct aggregator_route.rte */
+  struct rte_src *last_src;             /* Which src we announced the bucket last with */
    u32 count;
    u32 hash;
+  u32 id;
    struct f_val aggr_data[0];
  };
  
+struct rte_withdrawal_item {
+  struct rte_withdrawal_item *next;
+  struct aggregator_bucket *bucket;
+  struct net_addr addr;
+};
+
  struct aggregator_proto {
    struct proto p;
    struct channel *src, *dst;
+  enum aggregation_mode aggr_mode;
  
    /* Buckets by aggregator rule */
    HASH(struct aggregator_bucket) buckets;
-  slab *bucket_slab;
+  struct linpool *bucket_pool;
  
    /* Routes by net and src */
    HASH(struct aggregator_route) routes;
-  slab *route_slab;
+  struct linpool *route_pool;
  
    /* Aggregator rule */
-  uint aggr_on_count;
-  uint aggr_on_da_count;
+  u32 aggr_on_count;
+  u32 aggr_on_da_count;
    struct aggr_item *aggr_on;
  
    /* Merge filter */
    const struct f_line *merge_by;
    event reload_buckets;
+
+  /* Aggregation trie */
+  u32 addr_type;
+  struct trie_node *root;
+  struct slab *trie_slab;
+  int initial_feed;
+  int logging;
+
+  /* Array of bucket pointers */
+  struct aggregator_bucket **bucket_list;
+  size_t bucket_list_size;
+  size_t bucket_list_count;
+
+  /* Bucket IDs */
+  struct hmap bucket_id_map;
+
+  /* Route withdrawal */
+  struct rte_withdrawal_item *rte_withdrawal_stack;
+  struct linpool *rte_withdrawal_pool;
+  int rte_withdrawal_count;
  };
  
  enum aggr_item_type {
@@ -83,4 +122,35 @@ struct aggr_item_node {
    struct aggr_item i;
  };
  
+enum fib_status {
+  UNASSIGNED_FIB,
+  IN_FIB,
+  NON_FIB,
+};
+
+enum prefix_origin {
+  FILLER,
+  ORIGINAL,
+  AGGREGATED,
+};
+
+struct trie_node {
+  struct trie_node *parent;
+  struct trie_node *child[2];
+  struct trie_node *ancestor;
+  struct aggregator_bucket *original_bucket;
+  struct aggregator_bucket *selected_bucket;
+  enum fib_status status;
+  enum prefix_origin px_origin;
+  u32 potential_buckets[POTENTIAL_BUCKETS_BITMAP_SIZE];
+  int potential_buckets_count;
+  int depth;
+};
+
+void aggregator_aggregate(struct aggregator_proto *p);
+void aggregator_recompute(struct aggregator_proto *p, struct aggregator_route *old, struct aggregator_route *new);
+void aggregator_bucket_update(struct aggregator_proto *p, struct aggregator_bucket *bucket, struct network *net);
+
+struct trie_node *aggregator_root_init(struct aggregator_bucket *bucket, struct slab *trie_slab);
+
  #endif
diff --git a/proto/aggregator/bird.conf b/proto/aggregator/bird.conf

new file mode 100644 (file)

index 0000000..db8da40
--- /dev/null
+++ b/proto/aggregator/bird.conf
@@ -0,0 +1,30 @@
+
+debug protocols all;
+debug channels all;
+
+log "bird.log" all;
+
+ipv4 table aggregated;
+
+protocol device {}
+#router id 1;
+
+attribute int distinguisher;
+
+protocol static {
+    ipv4 {
+        import filter { distinguisher = 9; accept; };
+    };
+
+    route 10.100.0.0/16 unreachable;
+}
+
+protocol aggregator {
+    table master4;
+    peer table aggregated;
+    export filter { print net; accept; };
+    aggregate on distinguisher, net;
+    merge by { accept; };
+    # defualt route unrechable;
+}
+
diff --git a/proto/aggregator/config.Y b/proto/aggregator/config.Y

index 98d1037bb4b8590291667119f3026763c36f40ea..6a4abca04b70ee9bfb0e2720382bfa1d7a9f5c79 100644 (file)
--- a/proto/aggregator/config.Y
+++ b/proto/aggregator/config.Y
@@ -20,7 +20,7 @@ CF_DEFINES
  
  CF_DECLS
  
-CF_KEYWORDS(AGGREGATOR, PEER, AGGREGATE, ON, MERGE, BY)
+CF_KEYWORDS(AGGREGATOR, PEER, AGGREGATE, ON, MERGE, BY, RELOAD, AFTER, LOG, ALL)
  
  %type <ai> aggr_item aggr_list
  
@@ -34,7 +34,14 @@ aggregator_proto_start: proto_start AGGREGATOR
    this_channel = AGGREGATOR_CFG->src = channel_config_new(NULL, "source", 0, this_proto);
    AGGREGATOR_CFG->dst = channel_config_new(NULL, "destination", 0, this_proto);
  
-  AGGREGATOR_CFG->src->ra_mode = AGGREGATOR_CFG->dst->ra_mode = RA_ANY;
+  /*
+   * Aggregation mode is set to prefix aggregation by default, in which case we want to receive
+   * updates with the best routes.
+   */
+  AGGREGATOR_CFG->aggr_mode = PREFIX_AGGR;
+  AGGREGATOR_CFG->src->ra_mode = RA_OPTIMAL;
+  AGGREGATOR_CFG->dst->ra_mode = RA_ANY;
+  AGGREGATOR_CFG->logging = 0;
  };
  
  aggregator_proto_item:
@@ -45,23 +52,25 @@ aggregator_proto_item:
      if (AGGREGATOR_CFG->aggr_on)
        cf_error("Only one aggregate on clause allowed");
  
-    bool net_present = 0;
      int count = 0;
  
      for (const struct aggr_item_node *item = $3; item; item = item->next) {
-//      log(L_WARN "type %d sacode %d", item->i.type, item->i.sa.sa_code);
-      if (item->i.type == AGGR_ITEM_STATIC_ATTR && item->i.sa.sa_code == SA_NET)
-       net_present = 1;
+      /*
+       * If NET attribute is present, aggregate routes within the same net
+       * and receive updates with any routes.
+       */
+      if (item->i.type == AGGR_ITEM_STATIC_ATTR && item->i.sa.sa_code == SA_NET) {
+        AGGREGATOR_CFG->aggr_mode = NET_AGGR;
+        AGGREGATOR_CFG->src->ra_mode = RA_ANY;
+      }
  
        count++;
      }
  
-   if (!net_present)
-     cf_error("'NET' must be present");
-
     AGGREGATOR_CFG->aggr_on = cfg_alloc(sizeof(struct aggr_item) * count);
  
     int pos = 0;
+
     for (const struct aggr_item_node *item = $3; item; item = item->next) {
       if (item->i.type == AGGR_ITEM_DYNAMIC_ATTR)
         AGGREGATOR_CFG->aggr_on_da_count++;
@@ -79,11 +88,18 @@ aggregator_proto_item:
     $4->args++;
     AGGREGATOR_CFG->merge_by = $4;
   }
+ | LOG ALL { AGGREGATOR_CFG->logging = 1; }
  ;
  
  aggregator_proto_opts: /* empty */ | aggregator_proto_opts aggregator_proto_item ';' ;
-aggregator_proto: aggregator_proto_start proto_name '{' aggregator_proto_opts '}' ;
+aggregator_proto: aggregator_proto_start proto_name '{' aggregator_proto_opts '}' {
+  if (AGGREGATOR_CFG->src->table->addr_type != AGGREGATOR_CFG->dst->table->addr_type)
+    cf_error("Both rtables in aggregator must have the same network type");
  
+  if (PREFIX_AGGR == AGGREGATOR_CFG->aggr_mode)
+    if (AGGREGATOR_CFG->src->table->addr_type != NET_IP4 && AGGREGATOR_CFG->src->table->addr_type != NET_IP6)
+      cf_error("Trie aggregation is available only for IP4 or IPv6 networks");
+};
  
  aggr_list:
     aggr_item
diff --git a/proto/aggregator/run_test_case.sh b/proto/aggregator/run_test_case.sh

new file mode 100755 (executable)

index 0000000..792f29f
--- /dev/null
+++ b/proto/aggregator/run_test_case.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+dir_name="aggregator-test-case"
+
+cd ../..
+make all
+echo '------------------------'
+
+if [[ ! -e "$dir_name" ]]; then
+    mkdir "$dir_name"
+    echo "creating directory 'bird/$dir_name'"
+fi
+
+cp ./bird ./"$dir_name"
+echo 'copying bird executable'
+
+cp ./birdc ./"$dir_name"
+echo 'copying birdc executable'
+
+cp ./proto/aggregator/bird.conf ./"$dir_name"
+echo 'copying bird.conf'
+
+cd "$dir_name"
+
+echo 'done'
+echo
+echo 'expected result:'
+echo '====THIRD PASS===='
+echo '0.0.0.0/0'
+echo '10.100.0.0/16'
diff --git a/proto/aggregator/trie.c b/proto/aggregator/trie.c

new file mode 100644 (file)

index 0000000..fefa1b8
--- /dev/null
+++ b/proto/aggregator/trie.c
@@ -0,0 +1,1110 @@
+/*
+ *     BIRD Internet Routing Daemon -- Prefix aggregation
+ *
+ *     (c) 2023--2025 Igor Putovny <igor.putovny@nic.cz>
+ *     (c) 2025       CZ.NIC, z.s.p.o.
+ *
+ *     Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+/**
+ * DOC: Aggregator protocol trie
+ *
+ * Prefix aggregation implements the ORTC (Optimal Route Table Construction)
+ * algorithm [1].
+ * TODO: zdroje, odkazy na literaturu
+ *
+ * This algorithm uses a binary tree representation of the routing table.
+ * An edge from the parent node to its left child represents bit 0, and
+ * an edge from the parent node to its right child represents bit 1 as the
+ * prefix is traversed from the most to the least significant bit. Last node
+ * of every prefix contains original bucket where the route for this prefix
+ * belongs.
+ *
+ * Prefixes are therefore represented as a path through the trie, beginning at
+ * the root node. The last node on this path is called prefix node.
+ *
+ * TODO: popis originálního algoritmu zřetelně odlišit od toho, co tady skutečně děláme
+ *
+ * ORTC algorithm as described in the original paper consists of three passes
+ * through the trie.
+ *
+ * The first pass adds new nodes to the trie so that every node has either two
+ * or zero children. During this pass, routing information is propagated to the
+ * leaves.
+ *
+ * The second pass finds the most prevalent buckets by pushing information from
+ * the leaves up towards the root. Each node is assigned a set of potential
+ * buckets. If there are any common buckets among the node's children, they
+ * are carried to the parent node. Otherwise, all of children's buckets are
+ * carried to the parent node.
+ *
+ * The third pass moves down the trie while deciding which prefixes will be
+ * exported to the FIB. The node inherits a bucket from its closest ancestor
+ * that has a bucket. If the inherited bucket is one of potential buckets of
+ * this node, then this node does not need a bucket and its prefix will not
+ * be in FIB. Otherwise the node does need a bucket and any of its potential
+ * buckets can be chosen. We always choose the bucket with the lowest ID.
+ * This prefix will go to the FIB.
+ *
+ * Algorithm works with the assumption that there is a default route.
+ *
+ * The following is a description of this implementation.
+ *
+ * The trie contains three different kinds of nodes: original, aggregated and
+ * fillers. Original nodes represent prefixes from the original (import)
+ * routing table. Aggregated nodes represent prefixes that do not exist in the
+ * original table but exist in the aggregated (export) table, as they are result
+ * of the aggregation. Filler nodes are neither, they exist in the trie but do
+ * not represent any prefixes in original or aggregated table.
+ *
+ * Each node has a FIB status flag signalling whether this prefix was exported
+ * to the FIB (IN_FIB) or not (NON_FIB). It is clear that IN_FIB nodes can be
+ * either original or aggregated, whereas NON_FIB nodes can be either original
+ * or fillers.
+ *
+ * Every node contains pointer to its closest IN_FIB ancestor.
+ *
+ * After every aggregation, following invariants are always satisfied:
+ * 1. No original bucket can be null.
+ * 2. No ancestor pointer can be null.
+ * 3. If a node is IN_FIB, then
+ *      a) its selected bucket must not be null,
+ *      b) its ancestor pointer must point to itself,
+ *      c) its origin must be ORIGINAL or AGGREGATED.
+ * 4. If a node is NON_FIB, then
+ *      a) its selected bucket must be null,
+ *      b) its ancestor pointer must point to the nearest IN_FIB ancestor,
+ *      c) its origin must be ORIGINAL or FILLER.
+ *
+ * Our implementation differs from the algorithm as described in the original
+ * paper in several aspects. First, we do not normalize the trie by adding new
+ * nodes so that every node has either zero or two children. Second, propagation
+ * of original buckets, which was formerly done during first pass, is now done
+ * in the second pass. First pass is completely omitted.
+ * The two phases of aggregation are named propagate_and_merge() for first and
+ * second pass and group_prefixes() for third pass.
+ *
+ * Aggregator is capable of processing incremental updates. After receiving
+ * an update, which can be either announce or withdraw, corresponding node
+ * is found in the trie and its original bucket is updated. Trie now needs to
+ * be recomputed to reflect this update.
+ * Trie is traversed from the updated node upwards until its closest IN_FIB
+ * ancestor is found. This is the prefix node that covers an address space which
+ * is affected by received update. This is followed by propagate_and_merge(),
+ * which propagates potential buckets from the leaves upwards. Merging of sets
+ * of potential buckets continues upwards until the node's set is not changed by
+ * this operation. Finally, the third pass runs from this node, finishing the
+ * aggregation. During the third pass, changes in prefix FIB status are detected
+ * and routes are exported or removed from the routing table accordingly. All
+ * new routes are exported immmediately, whereas routes that are to be
+ * withdrawed are pushed on the stack and removed after recomputing the trie.
+ *
+ * References:
+ * [1] R. P. Draves, C. King, S. Venkatachary and B. D. Zill. Constructing
+ *     Optimal IP Routing Tables. In Proceedings of IEEE INFOCOM, volume 1,
+ *     pages 88-97, 1999.
+ * [2] Z. A. Uzmi, M. Nebel, A. Tariq, S. Jawad, R. Chen, A. Shaikh, J. Wang,
+ *     P. Francis. Practical and Near-Optimal FIB Aggregation using SMALTA.
+ *     In Proceedings of CoNEXT, 2011.
+ * [3] Y. Liu, B. Zhang, L. Wang. FIFA: Fast Incremental FIB Aggregation.
+ *     In Proceedings of IEEE INFOCOM, 2013.
+ * [4] Y. Liu, X. Zhao, K. Nam, L. Wang, B. Zhang. Incremental Forwarding
+ *     Table Aggregation. In Proceedings of IEEE GLOBECOM, 2010.
+ * [5] X. Zhao, Y. Liu, L. Wang, B. Zhang. On the Aggregatability of Router
+ *     Forwarding Tables. In Proceedings of IEEE INFOCOM, 2010.
+ *
+ */
+
+#undef LOCAL_DEBUG
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include "nest/bird.h"
+#include "filter/filter.h"
+#include "proto/aggregator/aggregator.h"
+
+#include <stdbool.h>
+
+/* TODO: comment what purpose this array has */
+/*
+ * Nodes in the trie can be either original, representing input prefixes;
+ * aggregated, representing prefixes created by aggregation; or fillers,
+ * which are neither. This array maps these values to strings which are
+ * printed when dumping the contents of the trie.
+ */
+static const char *px_origin_str[] = {
+  [FILLER]     = "filler",
+  [ORIGINAL]   = "original",
+  [AGGREGATED] = "aggregated",
+};
+
+/*
+ * We use ip6_addr (under its alias ip_addr) to contain both IPv4 and IPv6
+ * addresses. When using bitwise operations on these addresses, we have to
+ * add offset of 96 in case of IPv4 address, because IPv4 address is stored
+ * in the lowest 32 bits of ip6_addr, whereas IPv6 occupies all 128 bits.
+ */
+static const u32 ipa_shift[] = {
+  [NET_IP4] = IP6_MAX_PREFIX_LENGTH - IP4_MAX_PREFIX_LENGTH,
+  [NET_IP6] = 0,
+};
+
+/*
+ * Allocate and initialize root node
+ */
+struct trie_node *
+aggregator_root_init(struct aggregator_bucket *bucket, struct slab *trie_slab)
+{
+  struct trie_node *root = sl_allocz(trie_slab);
+
+  *root = (struct trie_node) {
+    .original_bucket = bucket,
+    .status = NON_FIB,
+    .px_origin = ORIGINAL,
+    .depth = 0,
+  };
+
+  return root;
+}
+
+static inline int
+aggregator_is_leaf(const struct trie_node *node)
+{
+  ASSERT_DIE(node != NULL);
+  return !node->child[0] && !node->child[1];
+}
+
+/*
+ * Unlink node from the trie by setting appropriate child of parent node to NULL
+ * and free memory.
+ */
+static inline void
+aggregator_remove_node(struct trie_node *node)
+{
+  ASSERT_DIE(node != NULL);
+  ASSERT_DIE(node->child[0] == NULL && node->child[1] == NULL);
+
+  if (!node->parent)
+    ;
+  else
+  {
+    if (node->parent->child[0] == node)
+    {
+      node->parent->child[0] = NULL;
+      ASSERT_DIE(node->parent->child[1] != node);
+    }
+    else if (node->parent->child[1] == node)
+    {
+      node->parent->child[1] = NULL;
+      ASSERT_DIE(node->parent->child[0] != node);
+    }
+    else
+      bug("Corrupted memory (node is not its parent's child)");
+  }
+
+  sl_free(node);
+}
+
+/*
+ * Insert @bucket to the set of potential buckets in @node
+ */
+static inline void
+aggregator_node_add_potential_bucket(struct trie_node *node, const struct aggregator_bucket *bucket)
+{
+  ASSERT_DIE(node->potential_buckets_count < MAX_POTENTIAL_BUCKETS_COUNT);
+
+  if (BIT32R_TEST(node->potential_buckets, bucket->id))
+    return;
+
+  BIT32R_SET(node->potential_buckets, bucket->id);
+  node->potential_buckets_count++;
+}
+
+/*
+ * Check if @bucket is one of potential buckets of @node
+ */
+static inline int
+aggregator_is_bucket_potential(const struct trie_node *node, u32 id)
+{
+  /* TODO: obecná otázka: musíme do těchto funkcí předávat  bucket, nebo by stačilo bucket id? */
+
+  ASSERT_DIE(node != NULL);
+
+  ASSERT_DIE(id < MAX_POTENTIAL_BUCKETS_COUNT);
+  return BIT32R_TEST(node->potential_buckets, id);
+}
+
+/*
+ * Return pointer to bucket with ID @id.
+ * Protocol contains list of pointers to all buckets. Every pointer
+ * lies at position equal to bucket ID to enable fast lookup.
+ */
+static inline struct aggregator_bucket *
+aggregator_get_bucket_from_id(const struct aggregator_proto *p, u32 id)
+{
+  ASSERT_DIE(id < p->bucket_list_size);
+  ASSERT_DIE(p->bucket_list[id] != NULL);
+  ASSERT_DIE(p->bucket_list[id]->id == id);
+  return p->bucket_list[id];
+}
+
+/*
+ * Select bucket with the lowest ID from the set of node's potential buckets
+ */
+static inline struct aggregator_bucket *
+aggregator_select_lowest_id_bucket(const struct aggregator_proto *p, const struct trie_node *node)
+{
+  ASSERT_DIE(p != NULL);
+  ASSERT_DIE(node != NULL);
+
+  for (int i = 0; i < POTENTIAL_BUCKETS_BITMAP_SIZE; i++)
+  {
+    if (node->potential_buckets[i] == 0)
+      continue;
+
+    /*
+     * Use CLZ -- Count Leading Zeroes to find first set bit.
+     * Compute its position from the beginning of the array.
+     */
+    u32 id = u32_clz(node->potential_buckets[i]) + i * 32;
+
+    struct aggregator_bucket *bucket = aggregator_get_bucket_from_id(p, id);
+    ASSERT_DIE(bucket != NULL);
+    ASSERT_DIE(bucket->id == id);
+
+    return bucket;
+  }
+
+  bug("No potential buckets to choose from");
+}
+
+/*
+ * @target: node we are computing set of potential buckets for
+ * @left, @right: left and right children of @target
+ *
+ * The resulting set is an intersection of sets of @left and @right. If this
+ * intersection is empty, resulting set is an union of @left and @right sets.
+ *
+ * Returns: whether the set of potential buckets in the target node has changed.
+ */
+static bool
+aggregator_merge_potential_buckets(struct trie_node *target, const struct trie_node *left, const struct trie_node *right)
+{
+  ASSERT_DIE(target != NULL);
+  ASSERT_DIE(left != NULL);
+  ASSERT_DIE(right != NULL);
+
+  bool has_intersection = false;
+  bool has_changed = false;
+
+  u32 before[ARRAY_SIZE(target->potential_buckets)] = { 0 };
+
+  target->potential_buckets_count = 0;
+
+  /* First we try to compute intersection. If it exists, we want to keep it. */
+  for (int i = 0; i < POTENTIAL_BUCKETS_BITMAP_SIZE; i++)
+  {
+    /* Save current bitmap values */
+    before[i] = target->potential_buckets[i];
+
+    /* Compute intersection */
+    target->potential_buckets[i] = left->potential_buckets[i] & right->potential_buckets[i];
+    target->potential_buckets_count += u32_popcount(target->potential_buckets[i]);
+
+    if (target->potential_buckets[i] != 0)
+      has_intersection = true;
+
+    if (before[i] != target->potential_buckets[i])
+      has_changed = true;
+  }
+
+  /* Intersection found */
+  if (has_intersection)
+    return has_changed;
+
+  /* Sets have an empty intersection, compute their union instead */
+  target->potential_buckets_count = 0;
+  has_changed = false;
+
+  for (int i = 0; i < POTENTIAL_BUCKETS_BITMAP_SIZE; i++)
+  {
+    target->potential_buckets[i] = left->potential_buckets[i] | right->potential_buckets[i];
+    target->potential_buckets_count += u32_popcount(target->potential_buckets[i]);
+
+    if (before[i] != target->potential_buckets[i])
+      has_changed = true;
+  }
+
+  return has_changed;
+}
+
+/*
+ * Dump aggregation trie
+ */
+static void
+aggregator_dump_trie_helper(const struct aggregator_proto *p, const struct trie_node *node, ip_addr *prefix, u32 pxlen, struct buffer *buf)
+{
+  ASSERT_DIE(p != NULL);
+  ASSERT_DIE(node != NULL);
+  ASSERT_DIE(prefix != NULL);
+
+  memset(buf->start, 0, buf->pos - buf->start);
+  buf->pos = buf->start;
+
+  struct net_addr addr = { 0 };
+  net_fill_ipa(&addr, *prefix, pxlen);
+
+  buffer_print(buf, "%*s%s%N ", 2 * node->depth, "", (node->status == IN_FIB) ? "@" : " ", &addr);
+
+  if (node->original_bucket)
+    buffer_print(buf, "[%u] ", node->original_bucket->id);
+  else
+    buffer_print(buf, "[] ");
+
+  buffer_print(buf, "{");
+
+  for (int i = 0, j = 0; i < POTENTIAL_BUCKETS_BITMAP_SIZE; i++)
+  {
+    if (node->potential_buckets[i] == 0)
+      continue;
+
+    u32 item = node->potential_buckets[i];
+
+    while (item != 0)
+    {
+      /* Find first set bit (CLZ -- Count Leading Zeroes) */
+      int bitpos = u32_clz(item);
+
+      /* Compute ID as offset from the beginning of array */
+      u32 id = i * 32 + (u32)bitpos;
+
+      buffer_print(buf, "%u", id);
+      j++;
+
+      if (j < node->potential_buckets_count)
+        buffer_print(buf, ", ");
+
+      /* Clear first set bit and continue */
+      u32 mask = 1U << (32 - bitpos - 1);
+      item &= ~mask;
+    }
+  }
+
+  buffer_print(buf, "}");
+
+  if (node->selected_bucket)
+    buffer_print(buf, " -> [[%u]]", node->selected_bucket->id);
+
+  buffer_print(buf, " %p %s", node, px_origin_str[node->px_origin]);
+  log("%s", buf->start);
+
+  if (node->child[0])
+  {
+    ASSERT_DIE((u32)node->depth == pxlen);
+    ip6_clrbit(prefix, node->depth + ipa_shift[p->addr_type]);
+    aggregator_dump_trie_helper(p, node->child[0], prefix, pxlen + 1, buf);
+  }
+
+  if (node->child[1])
+  {
+    ASSERT_DIE((u32)node->depth == pxlen);
+    ip6_setbit(prefix, node->depth + ipa_shift[p->addr_type]);
+    aggregator_dump_trie_helper(p, node->child[1], prefix, pxlen + 1, buf);
+    ip6_clrbit(prefix, node->depth + ipa_shift[p->addr_type]);
+  }
+}
+
+static void
+aggregator_dump_trie(const struct aggregator_proto *p)
+{
+  ip_addr prefix = (p->addr_type == NET_IP4) ? ipa_from_ip4(IP4_NONE) : ipa_from_ip6(IP6_NONE);
+
+  struct buffer buf = { 0 };
+  LOG_BUFFER_INIT(buf);
+
+  log("==== TRIE BEGIN ====");
+  aggregator_dump_trie_helper(p, p->root, &prefix, 0, &buf);
+  log("==== TRIE   END ====");
+}
+
+static inline void
+aggregator_create_route(struct aggregator_proto *p, ip_addr prefix, u32 pxlen, struct aggregator_bucket *bucket)
+{
+  struct net_addr addr = { 0 };
+  net_fill_ipa(&addr, prefix, pxlen);
+
+  struct network *n = allocz(sizeof(*n) + sizeof(addr));
+  net_copy(n->n.addr, &addr);
+
+  aggregator_bucket_update(p, bucket, n);
+}
+
+/*
+ * Prepare to withdraw route for @prefix
+ */
+static void
+aggregator_prepare_rte_withdrawal(struct aggregator_proto *p, ip_addr prefix, u32 pxlen, struct aggregator_bucket *bucket)
+{
+  ASSERT_DIE(p != NULL);
+  ASSERT_DIE(bucket != NULL);
+
+  /* Allocate the item */
+  struct rte_withdrawal_item *item = lp_allocz(p->rte_withdrawal_pool, sizeof(*item));
+
+  /* Fill in net and bucket */
+  struct net_addr addr = { 0 };
+  net_fill_ipa(&addr, prefix, pxlen);
+  net_copy(&item->addr, &addr);
+
+  item->bucket = bucket;
+
+  /* Push item onto stack */
+  item->next = p->rte_withdrawal_stack,
+  p->rte_withdrawal_stack = item;
+  p->rte_withdrawal_count++;
+}
+
+/*
+ * Insert @prefix to the trie and assign @bucket to this prefix. If the prefix
+ * is already in the trie, update its bucket to @bucket and return updated node.
+ */
+static struct trie_node *
+aggregator_trie_insert_prefix(struct aggregator_proto *p, ip_addr prefix, u32 pxlen, struct aggregator_bucket *bucket)
+{
+  ASSERT_DIE(p != NULL);
+  ASSERT_DIE(bucket != NULL);
+
+  struct trie_node *node = p->root;
+
+  for (u32 i = 0; i < pxlen; i++)
+  {
+    u32 bit = ip6_getbit(prefix, i + ipa_shift[p->addr_type]);
+
+    /* Add filler nodes onto the path to the actual prefix node */
+    if (!node->child[bit])
+    {
+      struct trie_node *new = sl_allocz(p->trie_slab);
+
+      *new = (struct trie_node) {
+        .parent = node,
+        .status = NON_FIB,
+        .px_origin = FILLER,
+        .depth = node->depth + 1,
+      };
+
+      node->child[bit] = new;
+    }
+
+    node = node->child[bit];
+  }
+
+  /* Assign bucket to the last node */
+  node->original_bucket = bucket;
+  node->px_origin = ORIGINAL;
+
+  return node;
+}
+
+/*
+ * Remove @prefix from the trie and return the last affected node
+ */
+static struct trie_node *
+aggregator_trie_remove_prefix(struct aggregator_proto *p, ip_addr prefix, u32 pxlen)
+{
+  struct trie_node *node = p->root;
+
+  for (u32 i = 0; i < pxlen; i++)
+  {
+    u32 bit = ip6_getbit(prefix, i + ipa_shift[p->addr_type]);
+    node = node->child[bit];
+    ASSERT_DIE(node != NULL);
+  }
+
+  ASSERT_DIE(node->px_origin == ORIGINAL);
+  ASSERT_DIE((u32)node->depth == pxlen);
+
+  /* TODO: okomentovat, proč tady ještě nesmíme uklízet směrem nahoru */
+  /*
+   * Even though this function is called to remove prefix from the trie, we
+   * can only change its origin from original to filler. Node itself cannot be
+   * removed just yet. If it was removed, we would lose information about the
+   * input data which are used by the algorithm. This information is essential
+   * for correctly recomputing the trie. If the algorithm decides the node is
+   * no longer needed, it will be removed later.
+   */
+  node->px_origin = FILLER;
+  node->ancestor = NULL;
+  node->original_bucket = NULL;
+  node->potential_buckets_count = 0;
+  memset(node->potential_buckets, 0, sizeof(node->potential_buckets));
+
+  return node;
+}
+
+/*
+ * Find prefix corresponding to the position of @target node in the trie
+ * and save result into @prefix and @pxlen.
+ */
+static void
+aggregator_find_subtree_prefix(const struct trie_node *target, ip_addr *prefix, u32 *pxlen, u32 type)
+{
+  ASSERT_DIE(target != NULL);
+  ASSERT_DIE(prefix != NULL);
+  ASSERT_DIE(pxlen != NULL);
+
+  int path[IP6_MAX_PREFIX_LENGTH] = { 0 };
+  int pos = 0;
+  u32 len = 0;
+
+  const struct trie_node *node = target;
+  const struct trie_node *parent = node->parent;
+
+  /* Ascend to the root node */
+  while (parent)
+  {
+    if (node == node->parent->child[0])
+      path[pos++] = 0;
+    else if (node == node->parent->child[1])
+      path[pos++] = 1;
+    else
+      bug("Corrupted memory (node is not its parent's child)");
+
+    ASSERT_DIE(pos < IP6_MAX_PREFIX_LENGTH);
+    node = parent;
+    parent = node->parent;
+  }
+
+  ASSERT_DIE(node->parent == NULL);
+
+  /* Descend to the target node */
+  for (int i = pos - 1; i >= 0; i--)
+  {
+    if (path[i] == 0)
+      ip6_clrbit(prefix, node->depth + ipa_shift[type]);
+    else if (path[i] == 1)
+      ip6_setbit(prefix, node->depth + ipa_shift[type]);
+
+    ASSERT_DIE(node->child[path[i]] != NULL);
+    node = node->child[path[i]];
+
+    len++;
+    ASSERT_DIE((u32)node->depth == len);
+  }
+
+  ASSERT_DIE(node == target);
+  *pxlen = len;
+}
+
+/*
+ * TODO:
+ * - okomentovat (radši víc)
+ * - přejmenovat (aggregator_propagate_update?)
+ * - zrušit `recomputing`
+ *
+ * First and second pass of Optimal Route Table Construction (ORTC) algorithm
+ *
+ * This function performs two tasks. First, it propagates original buckets from
+ * target node to the leaves. Original bucket from prefix node is assigned to
+ * all his descendants in a downward direction until another original node is
+ * reached. Second, it merges sets of potential buckets from leaves upward to
+ * the target node.
+ */
+static void
+aggregator_propagate_and_merge(struct trie_node *node)
+{
+  ASSERT_DIE(node != NULL);
+  ASSERT_DIE(node->status != UNASSIGNED_FIB);
+  ASSERT_DIE(node->potential_buckets_count <= MAX_POTENTIAL_BUCKETS_COUNT);
+
+  /* Propagate original buckets from original nodes to their descendants */
+  if (node->px_origin != ORIGINAL)
+  {
+    node->original_bucket = node->parent->original_bucket;
+
+    /*
+     * During initial aggregation, there are only original and filler nodes,
+     * thus this statement has no effect. When recomputing, aggregated nodes
+     * become fillers.
+     */
+    node->px_origin = FILLER;
+  }
+
+  ASSERT_DIE(node->original_bucket != NULL);
+
+  if (aggregator_is_leaf(node))
+  {
+    /*
+     * When running aggregation for the first time, erasing sets is not
+     * necessary, because they are empty. However, when recomputing, sets
+     * of the leaf nodes must be cleared. Sets in internal nodes don't have
+     * to, because they will be overwritten by merging operation.
+     */
+    node->potential_buckets_count = 0;
+    memset(node->potential_buckets, 0, sizeof(node->potential_buckets));
+
+    ASSERT_DIE(node->potential_buckets_count == 0);
+
+    /* Original bucket of leaf nodes is their potential bucket */
+    aggregator_node_add_potential_bucket(node, node->original_bucket);
+    return;
+  }
+
+  struct trie_node *left  = node->child[0];
+  struct trie_node *right = node->child[1];
+
+  /* Postorder traversal */
+  if (left)
+    aggregator_propagate_and_merge(left);
+
+  if (right)
+    aggregator_propagate_and_merge(right);
+
+  /*
+   * Merging sets of potential buckets obviously require node's two children as
+   * arguments. Since our implementation doesn't normalize the trie and therefore
+   * some nodes may have only one child, we simulate missing node by creating
+   * temporary node on stack and using it as an argument for merging.
+   */
+  struct trie_node imaginary_node = { 0 };
+
+  /* Imaginary node inherits potential bucket from its parent */
+  aggregator_node_add_potential_bucket(&imaginary_node, node->original_bucket);
+
+  /* Nodes with only one child */
+  if (left && !right)
+    right = &imaginary_node;
+  else if (!left && right)
+    left = &imaginary_node;
+
+  ASSERT_DIE(left != NULL && right != NULL);
+
+  /*
+   * If there are no common buckets among children's buckets, parent's
+   * buckets are computed as union of its children's buckets.
+   * Otherwise, parent's buckets are computed as intersection of its
+   * children's buckets.
+   */
+  aggregator_merge_potential_buckets(node, left, right);
+}
+
+/*
+ * @inherited_bucket: selected bucket of the closest ancestor of the target node
+ * which is in FIB and thus has a non-null bucket
+ *
+ * Process nodes that have only one child during grouping of prefixes and add
+ * new nodes if necessary.
+ *
+ * Because our implementation doesn't normalize the trie (by adding new nodes
+ * so that every node has either two or zero children) during first stage of
+ * aggregation, we need to decide if these missing nodes are indeed needed in
+ * the trie.
+ */
+static void
+aggregator_process_one_child_nodes(struct trie_node *node, const struct aggregator_bucket *inherited_bucket, struct slab *trie_slab)
+{
+  ASSERT_DIE(node != NULL);
+
+  /* Imaginary node that would have been added during normalization of the trie */
+  struct trie_node imaginary_node = {
+    .parent = node,
+    .original_bucket = node->original_bucket,
+    .status = NON_FIB,
+    .px_origin = AGGREGATED,
+    .depth = node->depth + 1,
+  };
+
+  /* Imaginary node inherits bucket from its parent - current node */
+  aggregator_node_add_potential_bucket(&imaginary_node, node->original_bucket);
+
+  /*
+   * If the current node (parent of the imaginary node) has a bucket, then
+   * the imaginary node inherits this bucket. Otherwise it inherits bucket
+   * from the closest ancestor which is IN_FIB and thus has a non-null bucket.
+   */
+  const struct aggregator_bucket * const imaginary_node_inherited_bucket = (node->status == IN_FIB)
+                                                                         ? node->selected_bucket
+                                                                         : inherited_bucket;
+
+  ASSERT_DIE(imaginary_node_inherited_bucket != NULL);
+
+  /*
+   * Since this implementation doesn't normalize the trie during first stage
+   * of aggregation, we need to know if these nodes are needed in the trie.
+   * These nodes are simulated by @imaginary_node. If the bucket that imaginary
+   * node inherits from its IN_FIB ancestor is NOT one of its potential buckets,
+   * imaginary node needs to be added to the trie because it's not covered
+   * by its ancestor.
+   */
+  if (!aggregator_is_bucket_potential(&imaginary_node, imaginary_node_inherited_bucket->id))
+  {
+    /* Allocate new node and copy imaginary node into it */
+    struct trie_node *new = sl_allocz(trie_slab);
+    *new = imaginary_node;
+
+    const struct trie_node * const left  = node->child[0];
+    const struct trie_node * const right = node->child[1];
+
+    /* Connect new node to the trie */
+    if (left && !right)
+      node->child[1] = new;
+    else
+      node->child[0] = new;
+  }
+}
+
+/*
+ * Export prefix of the current node to FIB and mark node as IN_FIB
+ */
+static void
+aggregator_export_node_prefix(struct aggregator_proto *p, struct trie_node *node, ip_addr prefix, u32 pxlen)
+{
+  ASSERT_DIE(node->potential_buckets_count > 0);
+
+  /* Save old bucket before assigning new */
+  struct aggregator_bucket * const old_bucket = node->selected_bucket;
+
+  /* Select bucket with the lowest ID */
+  node->selected_bucket = aggregator_select_lowest_id_bucket(p, node);
+  ASSERT_DIE(node->selected_bucket != NULL);
+
+  /* Node status is changing from NON_FIB to IN_FIB, export its route */
+  if (node->status != IN_FIB)
+  {
+    aggregator_create_route(p, prefix, pxlen, node->selected_bucket);
+  }
+  else /* Prefix is already in FIB */
+  {
+    ASSERT_DIE(old_bucket != NULL);
+
+    /* Node's bucket has changed, remove old route */
+    if (old_bucket && old_bucket != node->selected_bucket)
+    {
+      aggregator_prepare_rte_withdrawal(p, prefix, pxlen, old_bucket);
+      aggregator_create_route(p, prefix, pxlen, node->selected_bucket);
+    }
+  }
+
+  node->status = IN_FIB;
+  node->ancestor = node;
+
+  /* Original prefix stays original, otherwise it becomes aggregated */
+  node->px_origin = (node->px_origin == ORIGINAL) ? ORIGINAL : AGGREGATED;
+}
+
+/*
+ * Remove prefix of the current node from FIB and mark node as NON_FIB
+ */
+static void
+aggregator_remove_node_prefix(struct aggregator_proto *p, struct trie_node *node, ip_addr prefix, u32 pxlen)
+{
+  /* Node status is changing from IN_FIB to NON_FIB, withdraw its route */
+  if (node->status == IN_FIB)
+  {
+    ASSERT_DIE(node->selected_bucket != NULL);
+    aggregator_prepare_rte_withdrawal(p, prefix, pxlen, node->selected_bucket);
+  }
+
+  node->selected_bucket = NULL;
+  node->status = NON_FIB;
+  node->ancestor = node->parent->ancestor;
+
+  /*
+   * Original prefix stays original, otherwise it was aggregated and becomes
+   * a filler
+   */
+  node->px_origin = (node->px_origin == ORIGINAL) ? ORIGINAL : FILLER;
+}
+
+/*
+ * This functions moves from the target node downwards to the leaves and
+ * decides which prefixes are the result of the aggregation and will be
+ * exported to the FIB.
+ * Each node (except root node) is covered by one of its ancestors. We can say
+ * that each node "inherits" selected bucket from one of its ancestors. If this
+ * inherited bucket is one of the node's potential buckets, then this prefix
+ * will not go to the FIB, because its address space is already covered by some
+ * shorter prefix. However, if enherited bucket is not one of the node's
+ * potential bucket, then a bucket for this node is chosen from its set and the
+ * prefix is exported to the FIB.
+ */
+static void
+aggregator_group_prefixes_helper(struct aggregator_proto *p, struct trie_node *node, ip_addr *prefix, u32 pxlen)
+{
+  ASSERT_DIE(node != NULL);
+  ASSERT_DIE(node->status != UNASSIGNED_FIB);
+  ASSERT_DIE(node->potential_buckets_count <= MAX_POTENTIAL_BUCKETS_COUNT);
+
+  ASSERT_DIE(node->original_bucket != NULL);
+  ASSERT_DIE(node->parent->ancestor != NULL);
+  ASSERT_DIE(node->parent->ancestor->selected_bucket != NULL);
+
+  /* Bucket inherited from the closest ancestor with a non-null selected bucket */
+  const struct aggregator_bucket * const inherited_bucket = node->parent->ancestor->selected_bucket;
+
+  /*
+   * If the bucket inherited from the ancestor is one of potential buckets
+   * of the current node, then this node doesn't need a bucket because it
+   * inherits one, and its prefix is thus not needed in FIB.
+   */
+  if (aggregator_is_bucket_potential(node, inherited_bucket->id))
+    aggregator_remove_node_prefix(p, node, *prefix, pxlen);
+  else
+    aggregator_export_node_prefix(p, node, *prefix, pxlen);
+
+  ASSERT_DIE((node->selected_bucket != NULL && node->status == IN_FIB) || (node->selected_bucket == NULL && node->status == NON_FIB));
+  ASSERT_DIE(node->ancestor != NULL);
+  ASSERT_DIE(node->ancestor->original_bucket != NULL);
+  ASSERT_DIE(node->ancestor->selected_bucket != NULL);
+
+  const struct trie_node * const left  = node->child[0];
+  const struct trie_node * const right = node->child[1];
+
+  /* Process nodes with only one child */
+  if ((left && !right) || (!left && right))
+    aggregator_process_one_child_nodes(node, inherited_bucket, p->trie_slab);
+
+  /* Preorder traversal */
+  if (node->child[0])
+  {
+    ASSERT_DIE((u32)node->depth == pxlen);
+    ip6_clrbit(prefix, node->depth + ipa_shift[p->addr_type]);
+    aggregator_group_prefixes_helper(p, node->child[0], prefix, pxlen + 1);
+  }
+
+  if (node->child[1])
+  {
+    ASSERT_DIE((u32)node->depth == pxlen);
+    ip6_setbit(prefix, node->depth + ipa_shift[p->addr_type]);
+    aggregator_group_prefixes_helper(p, node->child[1], prefix, pxlen + 1);
+    ip6_clrbit(prefix, node->depth + ipa_shift[p->addr_type]);
+  }
+
+  /* Prune the trie */
+  if (node->status == NON_FIB && node->px_origin != ORIGINAL && aggregator_is_leaf(node))
+  {
+    ASSERT_DIE(node->selected_bucket == NULL);
+    aggregator_remove_node(node);
+  }
+}
+
+/*
+ * Third pass of Optimal Route Table Construction (ORTC) algorithm
+ *
+ * This functions represents final stage of aggregation. It decides which
+ * prefixes will be exported into FIB. In that case, it selects bucket for
+ * the target node from the set of its potential buckets and creates new
+ * route for this prefix. Recursively group prefixes in the subtree rooted
+ * at @node.
+ */
+static void
+aggregator_group_prefixes(struct aggregator_proto *p, struct trie_node *node)
+{
+  ASSERT_DIE(node != NULL);
+  ASSERT_DIE(node->potential_buckets_count <= MAX_POTENTIAL_BUCKETS_COUNT);
+  ASSERT_DIE(node->potential_buckets_count > 0);
+
+  ip_addr prefix = (p->addr_type == NET_IP4) ? ipa_from_ip4(IP4_NONE) : ipa_from_ip6(IP6_NONE);
+  u32 pxlen = 0;
+
+  /*
+   * If this function runs on a subtree and not the whole trie,
+   * find prefix that covers this subtree.
+   */
+  aggregator_find_subtree_prefix(node, &prefix, &pxlen, p->addr_type);
+
+  /* Export prefix of the current node */
+  aggregator_export_node_prefix(p, node, prefix, pxlen);
+
+  if (node->child[0])
+  {
+    ASSERT_DIE((u32)node->depth == pxlen);
+    ip6_clrbit(&prefix, node->depth + ipa_shift[p->addr_type]);
+    aggregator_group_prefixes_helper(p, node->child[0], &prefix, pxlen + 1);
+  }
+
+  if (node->child[1])
+  {
+    ASSERT_DIE((u32)node->depth == pxlen);
+    ip6_setbit(&prefix, node->depth + ipa_shift[p->addr_type]);
+    aggregator_group_prefixes_helper(p, node->child[1], &prefix, pxlen + 1);
+    ip6_clrbit(&prefix, node->depth + ipa_shift[p->addr_type]);
+  }
+}
+
+/*
+ * Check trie consistency and invariants
+ */
+static void
+check_trie_after_aggregation(const struct trie_node *node)
+{
+  ASSERT_DIE(node != NULL);
+  ASSERT_DIE(node->ancestor != NULL);
+
+  if (node->status == IN_FIB)
+  {
+    ASSERT_DIE(node->px_origin == ORIGINAL || node->px_origin == AGGREGATED);
+    ASSERT_DIE(node->selected_bucket != NULL);
+    ASSERT_DIE(node->ancestor != NULL);
+    ASSERT_DIE(node->ancestor == node);
+  }
+  else if (node->status == NON_FIB)
+  {
+    ASSERT_DIE(node->px_origin == ORIGINAL || node->px_origin == FILLER);
+    ASSERT_DIE(node->selected_bucket == NULL);
+    ASSERT_DIE(node->ancestor != NULL);
+    ASSERT_DIE(node->ancestor != node);
+    ASSERT_DIE(node->ancestor == node->parent->ancestor);
+  }
+  else
+    bug("Unknown node status");
+
+  if (node->child[0])
+    check_trie_after_aggregation(node->child[0]);
+
+  if (node->child[1])
+    check_trie_after_aggregation(node->child[1]);
+}
+
+/*
+ * Merge sets of potential buckets of node's children going from @node upwards.
+ * Stop when the node's set doesn't change and return the last updated node.
+ */
+static struct trie_node *
+aggregator_merge_buckets_above(struct trie_node *node)
+{
+  ASSERT_DIE(node != NULL);
+
+  struct trie_node *parent = node->parent;
+
+  while (parent)
+  {
+    const struct trie_node *left  = parent->child[0];
+    const struct trie_node *right = parent->child[1];
+    ASSERT_DIE(left == node || right == node);
+
+    struct trie_node imaginary_node = { 0 };
+    aggregator_node_add_potential_bucket(&imaginary_node, parent->original_bucket);
+
+    /* Nodes with only one child */
+    if (left && !right)
+      right = &imaginary_node;
+    else if (!left && right)
+      left = &imaginary_node;
+
+    ASSERT_DIE(left != NULL && right != NULL);
+
+    /* The parent's set didn't change by merging, stop here */
+    if (!aggregator_merge_potential_buckets(parent, left, right))
+      return node;
+
+    node = parent;
+    parent = node->parent;
+  }
+
+  return node;
+}
+
+static void
+aggregator_construct_trie(struct aggregator_proto *p)
+{
+  HASH_WALK(p->buckets, next_hash, bucket)
+  {
+    for (const struct rte *rte = bucket->rte; rte; rte = rte->next)
+    {
+      const struct net_addr *addr = rte->net->n.addr;
+
+      const ip_addr prefix = net_prefix(addr);
+      const u32 pxlen = net_pxlen(addr);
+
+      aggregator_trie_insert_prefix(p, prefix, pxlen, bucket);
+    }
+  }
+  HASH_WALK_END;
+}
+
+/*
+ * Run Optimal Routing Table Constructor (ORTC) algorithm
+ */
+static void
+aggregator_compute_trie(struct aggregator_proto *p)
+{
+  ASSERT_DIE(p->addr_type == NET_IP4 || p->addr_type == NET_IP6);
+
+  aggregator_propagate_and_merge(p->root);
+  aggregator_group_prefixes(p, p->root);
+
+  check_trie_after_aggregation(p->root);
+}
+
+void
+aggregator_aggregate(struct aggregator_proto *p)
+{
+  ASSERT_DIE(p->root != NULL);
+
+  aggregator_construct_trie(p);
+  aggregator_compute_trie(p);
+}
+
+/*
+ * Incorporate prefix change into the trie and reaggregate
+ */
+void
+aggregator_recompute(struct aggregator_proto *p, struct aggregator_route *old, struct aggregator_route *new)
+{
+  struct trie_node *updated_node = NULL;
+
+  /* Withdraw */
+  if (old && !new)
+  {
+    const struct net_addr *addr = old->rte.net->n.addr;
+
+    const ip_addr prefix = net_prefix(addr);
+    const u32 pxlen = net_pxlen(addr);
+
+    updated_node = aggregator_trie_remove_prefix(p, prefix, pxlen);
+    ASSERT_DIE(updated_node != NULL);
+  }
+  else /* Announce or update */
+  {
+    const struct net_addr *addr = new->rte.net->n.addr;
+
+    const ip_addr prefix = net_prefix(addr);
+    const u32 pxlen = net_pxlen(addr);
+
+    updated_node = aggregator_trie_insert_prefix(p, prefix, pxlen, new->bucket);
+
+    ASSERT_DIE(updated_node != NULL);
+    ASSERT_DIE(updated_node->px_origin == ORIGINAL);
+    ASSERT_DIE(updated_node->original_bucket != NULL);
+  }
+
+  struct trie_node *ancestor = updated_node;
+
+  /* Find the closest IN_FIB ancestor of the updated node */
+  // TODO: use node ancestor pointer instead of traversing
+  while (ancestor = ancestor->parent)
+  {
+    ASSERT_DIE(ancestor != updated_node);
+
+    /* Stop when IN_FIB ancestor is found or when we cannot continue further */
+    if (ancestor->status == IN_FIB || !ancestor->parent)
+      break;
+  }
+
+  ASSERT_DIE(ancestor != NULL);
+  ASSERT_DIE(ancestor != updated_node);
+  ASSERT_DIE(ancestor->status == IN_FIB);
+
+  /* Reaggregate trie with incorporated update */
+  aggregator_propagate_and_merge(ancestor);
+
+  /* Merge buckets upwards until they change, return last updated node */
+  struct trie_node *highest_node = aggregator_merge_buckets_above(ancestor);
+  ASSERT_DIE(highest_node != NULL);
+
+  aggregator_group_prefixes(p, highest_node);
+  check_trie_after_aggregation(highest_node);
+}
author	Igor Putovny <igor.putovny@nic.cz>
	Mon, 11 Sep 2023 10:38:19 +0000 (12:38 +0200)
committer	Igor Putovny <igor.putovny@nic.cz>
	Tue, 29 Apr 2025 13:01:15 +0000 (15:01 +0200)
lib/birdlib.h		patch \| blob \| blame \| history
lib/settle.h	[new file with mode: 0644]	patch \| blob
proto/aggregator/Makefile		patch \| blob \| blame \| history
proto/aggregator/aggregator.c		patch \| blob \| blame \| history
proto/aggregator/aggregator.h		patch \| blob \| blame \| history
proto/aggregator/bird.conf	[new file with mode: 0644]	patch \| blob
proto/aggregator/config.Y		patch \| blob \| blame \| history
proto/aggregator/run_test_case.sh	[new file with mode: 0755]	patch \| blob
proto/aggregator/trie.c	[new file with mode: 0644]	patch \| blob