M4FLAGS=@M4FLAGS@
BISONFLAGS=@BISONFLAGS@
LIBS=@LIBS@
-DAEMON_LIBS=@DAEMON_LIBS@
+COMMON_LIBS=@COMMON_LIBS@
+DAEMON_LIBS=@DAEMON_LIBS@ $(COMMON_LIBS)
CLIENT_LIBS=@CLIENT_LIBS@
CC=@CC@
M4=@M4@
client=$(addprefix $(exedir)/,@CLIENT@)
daemon=$(exedir)/bird
+common-lib=$(objdir)/libbird.o.txt
protocols=@protocols@
PROTO_BUILD := $(protocols) dev kif krt
all-daemon = $(daemon): $(obj)
all-client = $(client): $(obj)
+all-lib = $(common-lib): $(obj)
s = $(dir $(lastword $(MAKEFILE_LIST)))
ifeq ($(srcdir),.)
$(Q)mkdir -p $(addprefix $(objdir)/,$(dirs) doc)
$(Q)touch $@
+# Composing static library; older GCC's and linkers somehow fail
+# both with partial linking with LTO and also with static library creation,
+# thus we just collect all the deps and add them to the final build
+$(common-lib):
+ $(E)echo TXT $^ > $@
+ $(Q)echo $^ > $@
+# Some time in future, somebody may want to try the following recipe again
+# $(E)echo LD $(LDFLAGS) -r -o $@ $^
+# +$(Q)$(CC) $(LDFLAGS) -r -o $@ $^
+
+# The lib must be linked last
+$(daemon): $(common-lib)
+
+#$(warning $(patsubst $(common-lib),$(shell cat $(common-lib)),whatever obj/libbird.o.txt))
+
$(client) $(daemon):
$(E)echo LD $(LDFLAGS) -o $@ $^ $(LIBS)
- $(Q)$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
+ +$(Q)$(CC) $(LDFLAGS) -o $@ $(patsubst $(common-lib),$(shell cat $(common-lib)),$^) $(LIBS)
$(objdir)/sysdep/paths.h: Makefile
$(E)echo GEN $@
tests_targets_ok = $(addsuffix .ok,$(tests_targets))
-$(tests_targets): %: %.o $(tests_objs) | prepare
+$(tests_targets): %: %.o $(tests_objs) $(common-lib) | prepare
$(E)echo LD $(LDFLAGS) -o $@ $< "..." $(LIBS)
- $(Q)$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
+ +$(Q)$(CC) $(LDFLAGS) -o $@ $(patsubst $(common-lib),$(shell cat $(common-lib)),$^) $(LIBS)
# Hack to avoid problems with tests linking everything
$(tests_targets): LIBS += $(DAEMON_LIBS)
clean::
rm -f $(objdir)/sysdep/paths.h $(objdir)/nest/proto-build.c
rm -f $(addprefix $(exedir)/,bird birdc birdcl)
+ rm -f $(common-lib)
find $(objdir) -name "*.[od]" -exec rm -f '{}' '+'
testsclean:
dnl ** (c) 1999 Martin Mares <mj@ucw.cz>
dnl ** (c) 2021 Maria Matejka <mq@jmq.cz>
+# simplified adapted macro AX_COMPILER_VENDOR from autotools
+AC_DEFUN([BIRD_COMPILER_VENDOR],
+[
+ AC_CACHE_CHECK(
+ [which compiler vendor we are dealing with],
+ [bird_cv_compiler_vendor],
+ AC_COMPILE_IFELSE([
+ AC_LANG_PROGRAM([ int x = __clang__; ], [])
+ ],
+ [bird_cv_compiler_vendor=llvm],
+ AC_COMPILE_IFELSE([
+ AC_LANG_PROGRAM([ int x = __GNUC__; ], [])
+ ],
+ [bird_cv_compiler_vendor=gnu],
+ [bird_cv_compiler_vendor=unknown]
+ ))
+ )
+])
+
AC_DEFUN([BIRD_CHECK_POINTER_ALIGNMENT],
[
AC_CACHE_CHECK(
[
bird_tmp_cflags="$CFLAGS"
bird_tmp_ldflags="$LDFLAGS"
- CFLAGS="-flto"
- LDFLAGS="-flto=4"
+ CFLAGS="$1"
+ LDFLAGS="$2"
AC_CACHE_CHECK(
[whether link time optimizer is available],
static timer *config_timer; /* Timer for scheduled configuration rollback */
/* These are public just for cmd_show_status(), should not be accessed elsewhere */
-int shutting_down; /* Shutdown requested, do not accept new config changes */
int configuring; /* Reconfiguration is running */
int undo_available; /* Undo was not requested from last reconfiguration */
/* Note that both shutting_down and undo_available are related to requests, not processing */
c->pool = p;
c->mem = l;
c->file_name = ndup;
- c->tf_route = c->tf_proto = TM_ISO_SHORT_MS;
- c->tf_base = c->tf_log = TM_ISO_LONG_MS;
- c->gr_wait = DEFAULT_GR_WAIT;
+ c->runtime.tf_route = c->runtime.tf_proto = TM_ISO_SHORT_MS;
+ c->runtime.tf_base = c->runtime.tf_log = TM_ISO_LONG_MS;
+ c->runtime.gr_wait = DEFAULT_GR_WAIT;
callback_init(&c->obstacles_cleared, config_obstacles_cleared, &main_birdloop);
obstacle_target_init(&c->obstacles, &c->obstacles_cleared, p, "Config");
}
}
-struct global_runtime global_runtime_internal[2] = {{
- .tf_log = {
- .fmt1 = "%F %T.%3f",
- },
-}};
-struct global_runtime * _Atomic global_runtime = &global_runtime_internal[0];
-
static void
global_commit(struct config *new, struct config *old)
{
/* Updating the global runtime. */
- struct global_runtime *og = atomic_load_explicit(&global_runtime, memory_order_relaxed);
- struct global_runtime *ng = &global_runtime_internal[og == &global_runtime_internal[0]];
- ASSERT_DIE(ng != og);
-
-#define COPY(x) ng->x = new->x;
- MACRO_FOREACH(COPY,
- tf_route,
- tf_proto,
- tf_log,
- tf_base,
- cli_debug,
- latency_debug,
- latency_limit,
- watchdog_warning,
- watchdog_timeout,
- gr_wait,
- hostname
- );
-#undef COPY
-
- ng->load_time = current_time();
-
- if (new->router_id)
- ng->router_id = new->router_id;
- else if (old)
+ union bird_global_runtime *ng = &new->runtime;
+ SKIP_BACK_DECLARE(union bird_global_runtime, og, generic,
+ atomic_load_explicit(&global_runtime, memory_order_relaxed));
+
+ if (!ng->router_id && old)
{
/* The startup router ID must be determined after start of device protocol,
* thus if old == NULL then we do nothing */
}
}
- atomic_store_explicit(&global_runtime, ng, memory_order_release);
-
- /* We have to wait until every reader surely doesn't read the old values */
- synchronize_rcu();
+ switch_runtime(&ng->generic);
}
static int
OBSREF_CLEAR(config);
OBSREF_SET(config, OBSREF_GET(*cr));
- if (!c->hostname)
+ if (!c->runtime.hostname)
{
- c->hostname = get_hostname(c->mem);
+ c->runtime.hostname = get_hostname(c->mem);
- if (!c->hostname)
+ if (!c->runtime.hostname)
log(L_WARN "Cannot determine hostname");
}
#include "lib/ip.h"
#include "lib/hash.h"
#include "lib/resource.h"
+#include "lib/runtime.h"
#include "lib/obstacle.h"
#include "lib/timer.h"
#include "lib/tlists.h"
struct symbol *def_tables[NET_MAX]; /* Default routing tables for each network */
struct iface_patt *router_id_from; /* Configured list of router ID iface patterns */
- u32 router_id; /* Our Router ID */
u32 proto_default_debug; /* Default protocol debug mask */
u32 proto_default_mrtdump; /* Default protocol mrtdump mask */
u32 channel_default_debug; /* Default channel debug mask */
u32 table_default_debug; /* Default table debug mask */
u32 show_route_debug; /* Exports to CLI debug mask */
u16 filter_vstk, filter_estk; /* Filter stack depth */
- struct timeformat tf_route; /* Time format for 'show route' */
- struct timeformat tf_proto; /* Time format for 'show protocol' */
- struct timeformat tf_log; /* Time format for the logfile */
- struct timeformat tf_base; /* Time format for other purposes */
- u32 gr_wait; /* Graceful restart wait timeout (sec) */
- const char *hostname; /* Hostname */
-
- int cli_debug; /* Tracing of CLI connections and commands */
- enum latency_debug_flags {
- DL_PING = 1,
- DL_WAKEUP = 2,
- DL_SCHEDULING = 4,
- DL_ALLOCATOR = 8,
- DL_SOCKETS = 0x10,
- DL_EVENTS = 0x20,
- DL_TIMERS = 0x40,
- } latency_debug; /* I/O loops log information about task scheduling */
- u32 latency_limit; /* Events with longer duration are logged (us) */
- u32 watchdog_warning; /* I/O loop watchdog limit for warning (us) */
- u32 watchdog_timeout; /* Watchdog timeout (in seconds, 0 = disabled) */
+
+ union bird_global_runtime {
+ struct global_runtime generic;
+ struct {
+ GLOBAL_RUNTIME_CONTENTS;
+
+ struct timeformat tf_route; /* Time format for 'show route' */
+ struct timeformat tf_proto; /* Time format for 'show protocol' */
+
+ u32 gr_wait; /* Graceful restart wait timeout (sec) */
+
+ u32 router_id; /* Our Router ID */
+
+ int cli_debug; /* Tracing of CLI connections and commands */
+ u32 watchdog_timeout; /* Watchdog timeout (in seconds, 0 = disabled) */
+ };
+ } runtime;
+
char *err_msg; /* Parser error message */
int err_lino; /* Line containing error */
int err_chno; /* Character where the parser stopped */
int thread_group_simple; /* Simple variant of thread configuration */
TLIST_LIST(thread_group_config) thread_group; /* Configured thread groups */
struct thread_group_config *default_thread_group;
+ int thread_count; /* How many worker threads to prefork */
+ struct thread_config threads; /* Thread settings */
struct sym_scope *root_scope; /* Scope for root symbols */
struct sym_scope *current_scope; /* Current scope where we are actually in while parsing */
int gr_down; /* This is a pseudo-config for graceful restart */
};
-struct global_runtime {
- struct timeformat tf_route; /* Time format for 'show route' */
- struct timeformat tf_proto; /* Time format for 'show protocol' */
- struct timeformat tf_log; /* Time format for the logfile */
- struct timeformat tf_base; /* Time format for other purposes */
-
- u32 gr_wait; /* Graceful restart wait timeout (sec) */
-
- u32 router_id; /* Our Router ID */
- const char *hostname; /* Hostname */
-
- btime load_time; /* When we reconfigured last time */
- int cli_debug; /* Tracing of CLI connections and commands */
- enum latency_debug_flags latency_debug;
- u32 latency_limit; /* Events with longer duration are logged (us) */
- u32 watchdog_warning; /* I/O loop watchdog limit for warning (us) */
- u32 watchdog_timeout; /* Watchdog timeout (in seconds, 0 = disabled) */
-
- struct thread_group *default_thread_group; /* Default thread group if not specified otherwise */
-};
-
extern struct global_runtime * _Atomic global_runtime;
+/* BIRD's global runtime accessor */
+#define BIRD_GLOBAL_RUNTIME SKIP_BACK(union bird_global_runtime, generic, atomic_load_explicit(&global_runtime, memory_order_relaxed))
+
/* Please don't use these variables in protocols. Use proto_config->global instead. */
typedef OBSREF(struct config) config_ref;
struct flow_builder *this_flow;
+/**
+ * flow_check_cf_value_length - check value by flowspec component type
+ * @fb: flow builder instance
+ * @val: value
+ *
+ * This function checks if the value is in range of component's type support.
+ * If some problem will appear, the function calls cf_error() function with
+ * a textual description of reason to failing of validation.
+ */
+static void
+flow_check_cf_value_length(struct flow_builder *fb, u32 val)
+{
+ enum flow_type t = fb->this_type;
+ u8 max = flow_max_value_length(t, fb->ipv6);
+
+ if (t == FLOW_TYPE_DSCP && val > 0x3f)
+ cf_error("%s value %u out of range (0-63)", flow_type_str(t, fb->ipv6), val);
+
+ if (max == 1 && (val > 0xff))
+ cf_error("%s value %u out of range (0-255)", flow_type_str(t, fb->ipv6), val);
+
+ if (max == 2 && (val > 0xffff))
+ cf_error("%s value %u out of range (0-65535)", flow_type_str(t, fb->ipv6), val);
+}
+
+/**
+ * flow_check_cf_bmk_values - check value/bitmask part of flowspec component
+ * @fb: flow builder instance
+ * @neg: negation operand
+ * @val: value from value/mask pair
+ * @mask: bitmap mask from value/mask pair
+ *
+ * This function checks value/bitmask pair. If some problem will appear, the
+ * function calls cf_error() function with a textual description of reason
+ * to failing of validation.
+ */
+static void
+flow_check_cf_bmk_values(struct flow_builder *fb, u8 neg, u32 val, u32 mask)
+{
+ flow_check_cf_value_length(fb, val);
+ flow_check_cf_value_length(fb, mask);
+
+ if (neg && !(val == 0 || val == mask))
+ cf_error("For negation, value must be zero or bitmask");
+
+ if ((fb->this_type == FLOW_TYPE_TCP_FLAGS) && (mask & 0xf000))
+ cf_error("Invalid mask 0x%x, must not exceed 0xfff", mask);
+
+ if ((fb->this_type == FLOW_TYPE_FRAGMENT) && fb->ipv6 && (mask & 0x01))
+ cf_error("Invalid mask 0x%x, bit 0 must be 0", mask);
+
+ if (val & ~mask)
+ cf_error("Value 0x%x outside bitmask 0x%x", val, mask);
+}
+
+/**
+ * flow4_validate_cf - validate flowspec data structure &net_addr_flow4 in parsing time
+ * @f: flowspec data structure &net_addr_flow4
+ *
+ * Check if @f is valid flowspec data structure. Can call cf_error() function
+ * with a textual description of reason to failing of validation.
+ */
+static void
+flow4_validate_cf(net_addr_flow4 *f)
+{
+ enum flow_validated_state r = flow4_validate(flow4_first_part(f), flow_read_length(f->data));
+
+ if (r != FLOW_ST_VALID)
+ cf_error("Invalid flow route: %s", flow_validated_state_str(r));
+}
+
+/**
+ * flow6_validate_cf - validate flowspec data structure &net_addr_flow6 in parsing time
+ * @f: flowspec data structure &net_addr_flow6
+ *
+ * Check if @f is valid flowspec data structure. Can call cf_error() function
+ * with a textual description of reason to failing of validation.
+ */
+static void
+flow6_validate_cf(net_addr_flow6 *f)
+{
+ enum flow_validated_state r = flow6_validate(flow6_first_part(f), flow_read_length(f->data));
+
+ if (r != FLOW_ST_VALID)
+ cf_error("Invalid flow route: %s", flow_validated_state_str(r));
+}
+
CF_DECLS
[enable_debug_expensive=no]
)
+AC_ARG_ENABLE([debug-allocator],
+ [AS_HELP_STRING([--enable-debug-allocator], [enable internal memory allocator journal (implies --enable-debug) @<:@no@:>@])],
+ [],
+ [enable_debug_expensive=no]
+)
+
AC_ARG_ENABLE([memcheck],
[AS_HELP_STRING([--enable-memcheck], [check memory allocations when debugging @<:@yes@:>@])],
[],
AC_ARG_VAR([BISON], [location of the Bison program])
AC_ARG_VAR([M4], [location of the M4 program])
+if test "$enable_debug_allocator" = yes; then
+ enable_debug=yes
+fi
+
if test "$enable_debug_expensive" = yes; then
enable_debug=yes
fi
AC_MSG_ERROR([This program requires the GNU C Compiler.])
fi
+BIRD_COMPILER_VENDOR
+
BIRD_CHECK_THREAD_LOCAL
if test "$bird_cv_thread_local" = no ; then
AC_MSG_ERROR([This program requires thread local storage.])
if test "$bird_cv_lib_pthreads" = yes ; then
CFLAGS="$CFLAGS -pthread"
- LDFLAGS="$LDFLAGS -pthread"
+ COMMON_LIBS="$COMMON_LIBS -pthread"
else
AC_MSG_ERROR([POSIX threads not available.])
fi
BIRD_CHECK_GCC_OPTION([bird_cv_c_option_werror_implicit_function_declaration], [-Werror=implicit-function-declaration], [-Wall -Wextra])
if test "$enable_debug" = no; then
- BIRD_CHECK_LTO
+ LTO_CFLAGS=-flto
+ AS_CASE(${bird_cv_compiler_vendor},
+ gnu,LTO_LDFLAGS="-flto=jobserver",
+ llvm,LTO_LDFLAGS="-flto",
+ unknown,LTO_LDFLAGS="-flto",
+ AC_MSG_ERROR([Compiler vendor check failed for LTO: got ${bird_cv_compiler_vendor}]))
+ BIRD_CHECK_LTO(${LTO_CFLAGS}, ${LTO_LDFLAGS})
fi
if test "$bird_cv_c_lto" = yes; then
- CFLAGS="$CFLAGS -flto"
- LDFLAGS="$LDFLAGS -flto=4 -g"
+ CFLAGS="$CFLAGS $LTO_CFLAGS"
+ LDFLAGS="$LDFLAGS $LTO_LDFLAGS -g"
else
LDFLAGS="$LDFLAGS -g"
fi
AC_MSG_CHECKING([LDFLAGS])
AC_MSG_RESULT([$LDFLAGS])
+DAEMON_LIBS="${DAEMON_LIBS} ${LIBS}"
+AC_SUBST(DAEMON_LIBS)
+AC_SUBST(COMMON_LIBS)
+
AC_PROG_CPP
AC_PROG_INSTALL
-AC_PROG_RANLIB
+
AC_CHECK_PROG([FLEX], [flex], [flex])
AC_CHECK_PROG([BISON], [bison], [bison])
AC_CHECK_PROGS([M4], [gm4 m4])
AC_MSG_RESULT([$sysdep_dirs])
AC_SUBST([sysdep_dirs])
+AC_CHECK_FUNCS([pipe2])
+
if test "$with_iproutedir" = no ; then with_iproutedir= ; fi
if test -n "$given_iproutedir"
AC_SUBST([iproutedir])
-DAEMON_LIBS=
-AC_SUBST(DAEMON_LIBS)
-
if test "$enable_libssh" != no ; then
AC_CHECK_HEADER([libssh/libssh.h], [true], [fail=yes], [ ])
AC_CHECK_LIB([ssh], [ssh_connect], [true], [fail=yes])
if test "$fail" != yes ; then
AC_DEFINE([HAVE_LIBSSH], [1], [Define to 1 if you have the `ssh' library (-lssh).])
- DAEMON_LIBS="-lssh $DAEMON_LIBS"
+ COMMON_LIBS="-lssh $COMMON_LIBS"
enable_libssh=yes
else
if test "$enable_libssh" = yes ; then
if test "$enable_debug_expensive" = yes ; then
AC_DEFINE([ENABLE_EXPENSIVE_CHECKS], [1], [Define to 1 if you want to run expensive consistency checks.])
fi
+
+ if test "$enable_debug_allocator" = yes; then
+ AC_DEFINE([DEBUG_ALLOCATOR], [1], [Define to 1 if you want to store journals from memory allocations.])
+ fi
fi
if test "$enable_compact_tries" = yes ; then
the worker group thread count to that number, and the express group thread count
to one. This setting is deprecated and may disappear in some future version.
+<sect>Global performance options
+<label id="perf-opts">
+
+<p>The internal scheduler and allocator can be tweaked if needed. You probably
+don't want to do this, yet if you encounter some weird performance problem,
+these knobs may be handy. For now, all the options are concentrated in the
+<cf/memory {}/ block.
+
+<descrip>
+ <tag><label id="memory-global-keep-hot">global keep hot <m/number/</tag>
+ How much memory is kept hot at most in the global memory storage.
+ Overflowing memory is returned back to the OS. The virtual memory,
+ aka address space, is kept in the cold storage and may be later reused,
+ to prevent address space fragmentation problems.
+ Aligned automatically to the system page size.
+ This knob must be higher than the following memory settings.
+ Default: 16777216.
+
+ <tag><label id="memory-local-keep-hot">local keep hot <m/number/</tag>
+ How much memory is kept hot at most in every thread-local memory storage.
+ Overflowing memory is moved to the global hot storage.
+ Aligned automatically to the system page size.
+ This knob must be higher than the following memory settings.
+ Default: 524288.
+
+ <tag><label id="memory-local-keep-hot">allocate block <m/number/</tag>
+ How much memory is allocated at once when no more memory is neither
+ in available hot nor cold storages.
+ Aligned automatically to the system page size.
+ Default: 131072.
+</descrip>
+
<sect>Routing table options
<label id="rtable-opts">
-src := a-path.c a-set.c bitmap.c bitops.c blake2s.c blake2b.c checksum.c defer.c event.c flowspec.c idm.c ip.c lists.c lockfree.c mac.c md5.c mempool.c net.c netindex.c patmatch.c printf.c rcu.c resource.c sha1.c sha256.c sha512.c slab.c slists.c strtoul.c tbf.c timer.c xmalloc.c
+src := bitmap.c bitops.c blake2s.c blake2b.c cbor.c cbor-parser.c checksum.c defer.c event.c flowspec.c idm.c ip.c lists.c lockfree.c mac.c md5.c mempool.c net.c netindex.c patmatch.c printf.c rcu.c resource.c runtime.c sha1.c sha256.c sha512.c slab.c slists.c strtoul.c tbf.c timer.c xmalloc.c
+obj := $(src-o-files)
+$(all-lib)
+
+src := a-path.c a-set.c
obj := $(src-o-files)
$(all-daemon)
-tests_src := a-set_test.c a-path_test.c attribute_cleanup_test.c bitmap_test.c heap_test.c buffer_test.c event_test.c flowspec_test.c bitops_test.c patmatch_test.c fletcher16_test.c slist_test.c rt-normalize_test.c checksum_test.c lists_test.c locking_test.c mac_test.c ip_test.c hash_test.c printf_test.c rcu_test.c slab_test.c tlists_test.c type_test.c
+tests_src := a-set_test.c a-path_test.c attribute_cleanup_test.c bitmap_test.c cbor_test.c heap_test.c buffer_test.c event_test.c flowspec_test.c bitops_test.c patmatch_test.c fletcher16_test.c slist_test.c rt-normalize_test.c checksum_test.c lists_test.c locking_test.c mac_test.c ip_test.c hash_test.c printf_test.c rcu_test.c slab_test.c tlists_test.c type_test.c
tests_targets := $(tests_targets) $(tests-target-files)
tests_objs := $(tests_objs) $(src-o-files)
void bug(const char *msg, ...) NORET;
void vlog(int class, const char *msg, va_list args);
+void set_daemon_name(char *path, char *def);
+
+
#define L_DEBUG "\001" /* Debugging messages */
#define L_TRACE "\002" /* Protocol tracing */
#define L_INFO "\003" /* Informational messages */
u32 random_u32(void);
void random_init(void);
void random_bytes(void *buf, size_t size);
-
+#define random_type(T) ({ T out; random_bytes(&out, sizeof out); out; })
/* Hashing */
--- /dev/null
+/*
+ * BIRD CBOR parser
+ *
+ * (c) 2024 Maria Matejka <mq@jmq.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include "lib/birdlib.h"
+#include "lib/cbor.h"
+#include "lib/hash.h"
+
+/*
+ * Basic parser bits
+ */
+
+static void
+cbor_parser_init(struct cbor_parser_context *ctx, linpool *lp, uint max_depth)
+{
+ ctx->lp = lp;
+ ctx->flush = lp_save(lp);
+
+ ctx->stack_countdown[0] = 1;
+ ctx->stack_pos = 0;
+ ctx->stack_max = max_depth;
+
+ ctx->target_buf = NULL;
+ ctx->target_len = 0;
+
+ ctx->type = 0xff;
+
+ ctx->partial_state = CPE_TYPE;
+ ctx->partial_countdown = 0;
+}
+
+struct cbor_parser_context *
+cbor_parser_new(pool *p, uint stack_max_depth)
+{
+ linpool *lp = lp_new(p);
+ struct cbor_parser_context *ctx = lp_allocz(
+ lp, sizeof *ctx + (stack_max_depth + 1) * sizeof ctx->stack_countdown[0]);
+
+ cbor_parser_init(ctx, lp, stack_max_depth);
+ return ctx;
+}
+
+void cbor_parser_reset(struct cbor_parser_context *ctx)
+{
+ lp_restore(ctx->lp, ctx->flush);
+ ctx->flush = lp_save(ctx->lp);
+
+ ctx->type = 0xff;
+ ctx->target_buf = NULL;
+ ctx->target_len = 0;
+ ctx->error = NULL;
+ ctx->partial_state = CPE_TYPE;
+ ctx->partial_countdown = 0;
+ ctx->stack_pos = 0;
+ ctx->stack_countdown[0] = 1;
+}
+
+#define CBOR_PARSER_ERROR(...) do { \
+ ctx->error = lp_sprintf(ctx->lp, __VA_ARGS__);\
+ return CPR_ERROR; \
+} while (0)
+
+enum cbor_parse_result
+cbor_parse_byte(struct cbor_parser_context *ctx, const byte bp)
+{
+ ctx->tflags = 0;
+
+ switch (ctx->partial_state)
+ {
+ case CPE_EXIT:
+ CBOR_PARSER_ERROR("Trailing byte %02x", bp);
+
+ case CPE_ITEM_DONE:
+ bug("You have to check cbor_parse_block_end() before running cbor_parse_byte()");
+
+ case CPE_TYPE:
+ /* Split the byte to type and value */
+ ctx->type = bp >> 5;
+ ctx->value = bp & 0x1f;
+
+ if (ctx->type == 7)
+ {
+ if (ctx->value < 20)
+ CBOR_PARSER_ERROR("Unknown simple value %u", ctx->value);
+ else if (ctx->value < 24)
+ ; /* false, true, null, undefined */
+ else if (ctx->value < 28)
+ {
+ /* Need more data */
+ ctx->partial_state = CPE_READ_INT;
+ ctx->partial_countdown = (1 << (ctx->value - 24));
+ ctx->value = 0;
+ break;
+ }
+ else if (ctx->value == 31)
+ ; /* break-stop */
+ else
+ CBOR_PARSER_ERROR("Unknown simple value %u", ctx->value);
+ }
+ else
+ {
+ if (ctx->value < 24)
+ ; /* Immediate value, fall through */
+ else if (ctx->value < 28)
+ {
+ /* Need more data */
+ ctx->partial_state = CPE_READ_INT;
+ ctx->partial_countdown = (1 << (ctx->value - 24));
+ ctx->value = 0;
+ break;
+ }
+ else if ((ctx->value == 31) && (ctx->type >= 2) && (ctx->type <= 5))
+ /* Indefinite length, fall through */
+ ctx->tflags |= CPT_VARLEN;
+ else
+ CBOR_PARSER_ERROR("Garbled additional value %u for type %u", ctx->value, ctx->type);
+ }
+ /* fall through */
+
+ case CPE_READ_INT:
+ if (ctx->partial_state == CPE_READ_INT)
+ {
+ /* Reading a network order integer */
+ ctx->value <<= 8;
+ ctx->value |= bp;
+ if (--ctx->partial_countdown)
+ break;
+ }
+ /* fall through */
+
+ case CPE_COMPLETE_INT:
+ /* Some types are completely parsed, some not yet */
+ switch (ctx->type)
+ {
+ case 0:
+ case 1:
+ case 7:
+ ctx->partial_state = CPE_ITEM_DONE;
+ break;
+
+ case 2:
+ case 3:
+ ctx->partial_state = CPE_READ_BYTE;
+ ctx->partial_countdown = ctx->value;
+ break;
+
+ case 4:
+ case 5:
+ if (++ctx->stack_pos >= ctx->stack_max)
+ CBOR_PARSER_ERROR("Stack too deep");
+
+ /* set array/map size;
+ * once for arrays, twice for maps;
+ * ~0 for indefinite, plus one for the array/map head itself */
+ ctx->stack_countdown[ctx->stack_pos] = (ctx->tflags & CPT_VARLEN) ? ~0ULL :
+ (ctx->value * (ctx->type - 3)) ;
+ ctx->partial_state = CPE_TYPE;
+ break;
+ }
+
+ /* Process the value */
+ return CPR_MAJOR;
+
+ case CPE_READ_BYTE:
+ *ctx->target_buf = bp;
+ ctx->target_buf++;
+ if (--ctx->target_len)
+ break;
+
+ ctx->target_buf = NULL;
+ ctx->partial_state = CPE_ITEM_DONE;
+ return CPR_STR_END;
+ }
+
+ return CPR_MORE;
+}
+
+bool
+cbor_parse_block_end(struct cbor_parser_context *ctx)
+{
+ if (ctx->partial_state != CPE_ITEM_DONE)
+ return false;
+
+ if (--ctx->stack_countdown[ctx->stack_pos])
+ {
+ ctx->partial_state = CPE_TYPE;
+ return false;
+ }
+
+ if (!ctx->stack_pos--)
+ ctx->partial_state = CPE_EXIT;
+
+ return true;
+}
+
+/*
+ * CBOR channel multiplexer
+ */
+
+#define CCH_EQ(a,b) (a)->id == (b)->id
+#define CCH_FN(x) (x)->idhash
+#define CCH_KEY(x) (x)
+#define CCH_NEXT(x) (x)->next_hash
+
+struct cbor_channel cbor_channel_parse_error;
+
+#define CSTR_PARSER_ERROR(...) do { \
+ log(L_ERR __VA_ARGS__); \
+ sk_close(s); \
+ return 0; \
+} while (0)
+
+#define CCH_CALL_PARSER(cch, kind) ( \
+ cch->parse ? cch->parse(cch, kind) : \
+ (ctx->stack_pos > 1) ? CPR_MORE : CPR_BLOCK_END \
+ )
+
+#define CCH_PARSE(kind) do { \
+ ASSERT_DIE(cch); \
+ switch (CCH_CALL_PARSER(cch, kind)) { \
+ case CPR_MORE: continue; \
+ case CPR_ERROR: sk_close(s); \
+ return 0; \
+ case CPR_BLOCK_END: stream->state = CSTR_FINISH; \
+ break; \
+ default: bug("Invalid return value from channel parser"); \
+ }} while(0)
+
+static int
+cbor_stream_rx(sock *s, uint sz)
+{
+ struct cbor_stream *stream = s->data;
+ struct cbor_parser_context *ctx = &stream->parser;
+ struct cbor_channel *cch = stream->cur_rx_channel;
+ u64 id;
+
+ for (uint pos = 0; pos < sz; pos++)
+ {
+ switch (cbor_parse_byte(ctx, s->rbuf[pos]))
+ {
+ case CPR_MORE:
+ continue;
+
+ case CPR_ERROR:
+ log(L_ERR "CBOR parser failure: %s", ctx->error);
+ sk_close(s);
+ return 0;
+
+ case CPR_MAJOR:
+ switch (stream->state)
+ {
+ case CSTR_INIT:
+ if (ctx->type != 4)
+ CSTR_PARSER_ERROR("Expected array, got %u", ctx->type);
+
+ if (ctx->value < 2)
+ CSTR_PARSER_ERROR("Expected array of length at least 2");
+
+ stream->state = CSTR_EXPECT_ID;
+ break;
+
+ case CSTR_EXPECT_ID:
+ CBOR_PARSE_ONLY(ctx, POSINT, id);
+ stream->state = CSTR_MSG;
+ stream->cur_rx_channel = cch = (
+ cbor_channel_find(stream, id) ?:
+ cbor_channel_create(stream, id)
+ );
+ break;
+
+ case CSTR_MSG:
+ CCH_PARSE(CPR_MAJOR);
+ break;
+
+ case CSTR_FINISH:
+ case CSTR_CLEANUP:
+ bug("Invalid stream pre-parser state");
+ }
+ break;
+
+ case CPR_STR_END:
+ ASSERT_DIE(stream->state == CSTR_MSG);
+ CCH_PARSE(CPR_STR_END);
+ break;
+
+ case CPR_BLOCK_END:
+ bug("Impossible value returned from cbor_parse_byte()");
+ }
+
+ while (cbor_parse_block_end(ctx))
+ {
+ switch (stream->state)
+ {
+ case CSTR_INIT:
+ case CSTR_EXPECT_ID:
+ case CSTR_CLEANUP:
+// CSTR_PARSER_ERROR("Invalid stream pre-parser state");
+ bug("Invalid stream pre-parser state");
+
+ case CSTR_MSG:
+ CCH_PARSE(CPR_BLOCK_END);
+ break;
+
+ case CSTR_FINISH:
+ stream->state = CSTR_CLEANUP;
+ break;
+ }
+ }
+
+ if (stream->state == CSTR_CLEANUP)
+ {
+ if (ctx->partial_state != CPE_EXIT)
+ CSTR_PARSER_ERROR("Garbled end of message");
+
+ stream->cur_rx_channel = NULL;
+
+ if (!cch->parse)
+ cbor_channel_done(cch);
+
+ ctx->partial_state = CPE_TYPE;
+ stream->state = CSTR_INIT;
+
+ if (pos + 1 < sz)
+ {
+ memmove(s->rbuf, s->rbuf + pos + 1, sz - pos - 1);
+ s->rpos = s->rbuf + sz - pos - 1;
+ }
+
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static void
+cbor_stream_err(sock *sk, int err)
+{
+ struct cbor_stream *stream = sk->data;
+ if (err)
+ log(L_INFO "CBOR stream %p error: %d (%M)", sk, err, err);
+ else
+ log(L_INFO "CBOR stream %p hangup", sk);
+
+ stream->cur_rx_channel = NULL;
+
+ HASH_WALK_DELSAFE(stream->channels, next_hash, cch)
+ {
+ cbor_channel_done(cch);
+ }
+ HASH_WALK_DELSAFE_END;
+
+ stream->cancel(stream);
+
+ sk_close(sk);
+}
+
+void
+cbor_stream_init(struct cbor_stream *stream, pool *p, uint parser_depth, uint writer_depth, uint channel_size)
+{
+ stream->p = rp_newf(p, p->domain, "Stream pool");
+ HASH_INIT(stream->channels, stream->p, 4);
+ stream->slab = sl_new(stream->p, channel_size);
+
+ random_bytes(&stream->hmul, sizeof stream->hmul);
+ stream->writer_depth = writer_depth;
+ stream->state = CSTR_INIT;
+
+ cbor_parser_init(&stream->parser, lp_new(p), parser_depth);
+}
+
+void
+cbor_stream_attach(struct cbor_stream *stream, sock *sk)
+{
+ sk->data = stream;
+ sk->rx_hook = cbor_stream_rx;
+ sk->err_hook = cbor_stream_err;
+
+ stream->s = sk;
+ stream->loop = sk->loop;
+}
+
+struct cbor_channel *
+cbor_channel_create(struct cbor_stream *stream, u64 id)
+{
+ struct cbor_channel *cch = sl_allocz(stream->slab);
+ *cch = (struct cbor_channel) {
+ .id = id,
+ .idhash = id * stream->hmul,
+ .p = rp_newf(stream->p, stream->p->domain, "Channel 0x%lx", id),
+ .stream = stream,
+ .parse = stream->parse,
+ };
+
+ log(L_TRACE "CBOR channel create in stream %p, id %lx", stream, id);
+ HASH_INSERT(stream->channels, CCH, cch);
+ return cch;
+}
+
+struct cbor_channel *
+cbor_channel_find(struct cbor_stream *stream, u64 id)
+{
+ struct cbor_channel cchloc;
+ cchloc.id = id;
+ cchloc.idhash = cchloc.id * stream->hmul;
+
+ return HASH_FIND(stream->channels, CCH, &cchloc);
+}
+
+struct cbor_channel *
+cbor_channel_new(struct cbor_stream *stream)
+{
+ u64 id;
+ while (cbor_channel_find(stream, id = random_type(u64)))
+ ;
+
+ return cbor_channel_create(stream, id);
+}
+
+void
+cbor_channel_done(struct cbor_channel *channel)
+{
+ struct cbor_stream *stream = channel->stream;
+ bool active = (stream->cur_rx_channel == channel);
+
+ log(L_TRACE "CBOR channel%s done in stream %p, id %lx",
+ active ? " (active)" : "", stream, channel->id);
+
+ if (active)
+ {
+ channel->parse = NULL;
+ }
+ else
+ {
+ HASH_REMOVE(stream->channels, CCH, channel);
+ rp_free(channel->p);
+ sl_free(channel);
+ }
+}
--- /dev/null
+#include <stdint.h>
+#include <string.h>
+
+#include "lib/cbor.h"
+
+/* String versions of type constants */
+static const char *cbor_type_str_a[] = {
+ "POSINT",
+ "NEGINT",
+ "BYTES",
+ "TEXT",
+ "ARRAY",
+ "MAP",
+ "TAG",
+ "SPECIAL",
+};
+
+const char *
+cbor_type_str(enum cbor_basic_type t)
+{
+ return (t < ARRAY_SIZE(cbor_type_str_a)) ?
+ cbor_type_str_a[t] :
+ tmp_sprintf("(unknown: %u)", t);
+}
+
+/* Raw data writing */
+
+bool cbor_put_check(struct cbor_writer *w, u64 amount)
+{
+ return w->data.pos + amount <= w->data.end;
+}
+
+#define CBOR_PUT(amount) ({ \
+ byte *put = w->data.pos; \
+ if ((w->data.pos += (amount)) >= w->data.end) return false; \
+ put; })
+
+bool cbor_put_raw_u8(struct cbor_writer *w, byte b)
+{
+ *(CBOR_PUT(1)) = b;
+ return true;
+}
+
+bool cbor_put_raw_u16(struct cbor_writer *w, u16 val)
+{
+ put_u16(CBOR_PUT(2), val);
+ return true;
+}
+
+bool cbor_put_raw_u32(struct cbor_writer *w, u32 val)
+{
+ put_u32(CBOR_PUT(4), val);
+ return true;
+}
+
+bool cbor_put_raw_u64(struct cbor_writer *w, u64 val)
+{
+ put_u64(CBOR_PUT(8), val);
+ return true;
+}
+
+bool cbor_put_raw_data(struct cbor_writer *w, const byte *block, u64 size)
+{
+ memcpy(CBOR_PUT(size), block, size);
+ return true;
+}
+
+/* Basic value putting */
+bool cbor_put(struct cbor_writer *w, enum cbor_basic_type type, u64 value)
+{
+ ASSERT_DIE((type >= 0) && (type <= 8));
+ w->stack[w->stack_pos].items++;
+ byte tt = type << 5;
+ if (value < 0x18)
+ return
+ cbor_put_raw_u8(w, tt | value);
+ else if (value < 0x100)
+ return
+ cbor_put_raw_u8(w, tt | 0x18) &&
+ cbor_put_raw_u8(w, value);
+ else if (value < 0x10000)
+ return
+ cbor_put_raw_u8(w, tt | 0x19) &&
+ cbor_put_raw_u16(w, value);
+ else if (value < 0x100000000)
+ return
+ cbor_put_raw_u8(w, tt | 0x1a) &&
+ cbor_put_raw_u32(w, value);
+ else
+ return
+ cbor_put_raw_u8(w, tt | 0x1b) &&
+ cbor_put_raw_u64(w, value);
+}
+
+bool cbor_put_int(struct cbor_writer *w, int64_t value)
+{
+ if (value >= 0)
+ return cbor_put(w, CBOR_POSINT, value);
+ else
+ return cbor_put(w, CBOR_NEGINT, -1-value);
+}
+
+/* Strings */
+bool cbor_put_raw_bytes(struct cbor_writer *w, enum cbor_basic_type type, const byte *block, u64 size)
+{
+ return
+ cbor_put(w, type, size) &&
+ cbor_put_raw_data(w, block, size);
+}
+
+/* Arrays and maps */
+bool cbor_put_open(struct cbor_writer *w, enum cbor_basic_type type)
+{
+ if (++w->stack_pos >= w->stack_max)
+ return false;
+
+ w->stack[w->stack_pos].head = w->data.pos;
+ w->stack[w->stack_pos].items = 0;
+
+ return cbor_put(w, type, ~0ULL);
+}
+
+bool cbor_put_close(struct cbor_writer *w, u64 actual_size, bool strict)
+{
+ ASSERT_DIE(w->stack_pos > 0);
+
+ /* Pop the stack */
+ byte *head = w->stack[w->stack_pos].head;
+ u64 items = w->stack[w->stack_pos].items;
+
+ w->stack_pos--;
+
+ /* The open mark puts its item counter one level
+ * too deep; fixing this. */
+ items--;
+ w->stack[w->stack_pos].items++;
+
+ /* Check the original head position */
+ ASSERT_DIE((head[0] & 0x1f) == 0x1b);
+ ASSERT_DIE(w->data.pos >= w->data.start + 9);
+ switch (head[0] >> 5)
+ {
+ case CBOR_ARRAY:
+ if (strict && (items != actual_size))
+ bug("Inconsistent array item count");
+ break;
+
+ case CBOR_MAP:
+ if (strict && (items != actual_size * 2))
+ bug("Inconsistent map item count");
+ else if (items & 1)
+ bug("Trailing map key");
+ else
+ items /= 2;
+ break;
+
+ default:
+ bug("Head points to something other than array or map");
+ }
+
+ /* Move the data back */
+
+ if (items < 0x18)
+ {
+ memmove(head+1, head+9, w->data.pos - (head+9));
+ head[0] &= (0xe0 | items);
+ w->data.pos -= 8;
+ }
+ else if (items < 0x100)
+ {
+ memmove(head+2, head+9, w->data.pos - (head+9));
+ head[0] &= 0xf8;
+ head[1] = items;
+ w->data.pos -= 7;
+ }
+ else if (items < 0x10000)
+ {
+ memmove(head+3, head+9, w->data.pos - (head+9));
+ head[0] &= 0xf9;
+ put_u16(head+1, items);
+ w->data.pos -= 6;
+ }
+ else if (items < 0x100000000)
+ {
+ memmove(head+5, head+9, w->data.pos - (head+9));
+ head[0] &= 0xfa;
+ put_u32(head+1, items);
+ w->data.pos -= 4;
+ }
+ else
+ {
+ head[0] &= 0xfb;
+ put_u64(head+1, items);
+ }
+
+ return true;
+}
+
+/* Tags: TODO! */
+
+
+/* Writer contexts */
+struct cbor_writer *
+cbor_reply_init(struct cbor_channel *cch)
+{
+ ASSERT_DIE(cch->stream->s->tbsize > 16);
+ ASSERT_DIE(cch->stream->s->tbuf);
+ struct cbor_writer *cw = &cch->writer;
+ if (cch->stream->s->tbuf != cch->stream->s->tpos)
+ bug("Not implemented reply to not-fully-flushed buffer");
+
+ cbor_writer_init(cw, cch->stream->writer_depth, cch->stream->s->tbuf, cch->stream->s->tbsize);
+
+ ASSERT_DIE(cbor_open_array(cw));
+ ASSERT_DIE(cbor_put_posint(cw, cch->id));
+ return cw;
+}
+
+void
+cbor_reply_send(struct cbor_channel *cch, struct cbor_writer *cw)
+{
+ ASSERT_DIE(cw == &cch->writer);
+ ASSERT_DIE(cbor_close_array(cw));
+ sk_send(cch->stream->s, cw->data.pos - cw->data.start);
+}
+
+#if 0
+
+void cbor_epoch_time(struct cbor_writer *writer, int64_t time, int shift)
+{
+ write_item(writer, 6, 1); // 6 is TAG, 1 is tag number for epoch time
+ cbor_relativ_time(writer, time, shift);
+}
+
+void cbor_relativ_time(struct cbor_writer *writer, int64_t time, int shift)
+{
+ write_item(writer, 6, 4); // 6 is TAG, 4 is tag number for decimal fraction
+ cbor_open_list_with_length(writer, 2);
+ cbor_add_int(writer, shift);
+ cbor_add_int(writer, time);
+}
+
+void cbor_add_ipv4(struct cbor_writer *writer, ip4_addr addr)
+{
+ write_item(writer, 6, 52); // 6 is TAG, 52 is tag number for ipv4
+ write_item(writer, 2, 4); // bytestring of length 4
+ put_ip4(&writer->cbor[writer->pt], addr);
+ writer->pt += 4;
+}
+
+void cbor_add_ipv6(struct cbor_writer *writer, ip6_addr addr)
+{
+ write_item(writer, 6, 54); // 6 is TAG, 54 is tag number for ipv6
+ write_item(writer, 2, 16); // bytestring of length 16
+ put_ip6(&writer->cbor[writer->pt], addr);
+ writer->pt += 16;
+}
+
+
+void cbor_add_ipv4_prefix(struct cbor_writer *writer, net_addr_ip4 *n)
+{
+ write_item(writer, 6, 52); // 6 is TAG, 52 is tag number for ipv4
+ cbor_open_block_with_length(writer, 2);
+ cbor_add_int(writer, n->pxlen);
+ write_item(writer, 2, 4); // bytestring of length 4
+ put_ip4(&writer->cbor[writer->pt], n->prefix);
+ writer->pt += 4;
+}
+
+
+void cbor_add_ipv6_prefix(struct cbor_writer *writer, net_addr_ip6 *n)
+{
+ write_item(writer, 6, 54); // 6 is TAG, 54 is tag number for ipv6
+ cbor_open_block_with_length(writer, 2);
+ cbor_add_int(writer, n->pxlen);
+
+ write_item(writer, 2, 16);
+ put_ip6(&writer->cbor[writer->pt], n->prefix);
+ writer->pt += 16;
+}
+
+
+void cbor_add_uint(struct cbor_writer *writer, uint64_t item)
+{
+ write_item(writer, 0, item);
+}
+
+void cbor_add_tag(struct cbor_writer *writer, int item)
+{
+ write_item(writer, 6, item);
+}
+
+void cbor_add_string(struct cbor_writer *writer, const char *string)
+{
+ int length = strlen(string);
+ write_item(writer, 3, length); // 3 is major, then goes length of string and string
+ check_memory(writer, length);
+ memcpy(writer->cbor+writer->pt, string, length);
+ writer->pt+=length;
+}
+
+void cbor_nonterminated_string(struct cbor_writer *writer, const char *string, uint32_t length)
+{
+ write_item(writer, 3, length); // 3 is major, then goes length of string and string
+ check_memory(writer, length);
+ memcpy(writer->cbor+writer->pt, string, length);
+ writer->pt+=length;
+}
+
+void write_item(struct cbor_writer *writer, uint8_t major, uint64_t num)
+{
+ //log("write major %i %li", major, num);
+ major = major<<5;
+ check_memory(writer, 10);
+ if (num > ((uint64_t)1<<(4*8))-1)
+ { // We need 8 bytes to encode the num
+ major += 0x1b; // reserving those bytes
+ writer->cbor[writer->pt] = major;
+ writer->pt++;
+ for (int i = 7; i>=0; i--)
+ { // write n-th byte of num
+ uint8_t to_write = (num>>(i*8)) & 0xff;
+ writer->cbor[writer->pt] = to_write;
+ writer->pt++;
+ }
+ return;
+ }
+ if (num > (1<<(2*8))-1)
+ { // We need 4 bytes to encode the num
+ major += 0x1a; // reserving those bytes
+ writer->cbor[writer->pt] = major;
+ writer->pt++;
+ for (int i = 3; i>=0; i--)
+ { // write n-th byte of num
+ uint8_t to_write = (num>>(i*8)) & 0xff;
+ writer->cbor[writer->pt] = to_write;
+ writer->pt++;
+ }
+ return;
+ }
+ if (num > (1<<(8))-1)
+ { // We need 2 bytes to encode the num
+ major += 0x19; // reserving those bytes
+ writer->cbor[writer->pt] = major;
+ writer->pt++;
+ for (int i = 1; i>=0; i--)
+ { // write n-th byte of num
+ uint8_t to_write = (num>>(i*8)) & 0xff;
+ writer->cbor[writer->pt] = to_write;
+ writer->pt++;
+ }
+ return;
+ }
+ if (num > 23)
+ { // byte is enough, but aditional value would be too big
+ major += 0x18; // reserving that byte
+ writer->cbor[writer->pt] = major;
+ writer->pt++;
+ uint8_t to_write = num & 0xff;
+ writer->cbor[writer->pt] = to_write;
+ writer->pt++;
+ return;
+ }
+ //log("write item major %i num %i writer->pt %i writer->capacity %i writer %i", major, num, writer->pt, writer->capacity, writer);
+ major += num; // we can store the num as additional value
+ writer->cbor[writer->pt] = major;
+ writer->pt++;
+}
+
+void cbor_write_item_with_constant_val_length_4(struct cbor_writer *writer, uint8_t major, uint64_t num)
+{
+// this is only for headers which should be constantly long.
+ major = major<<5;
+ check_memory(writer, 10);
+ major += 0x1a; // reserving those bytes
+ writer->cbor[writer->pt] = major;
+ writer->pt++;
+ for (int i = 3; i>=0; i--)
+ { // write n-th byte of num
+ uint8_t to_write = (num>>(i*8)) & 0xff;
+ writer->cbor[writer->pt] = to_write;
+ writer->pt++;
+ }
+}
+
+
+void rewrite_4bytes_int(struct cbor_writer *writer, int pt, int num)
+{
+ for (int i = 3; i>=0; i--)
+ {
+ uint8_t to_write = (num>>(i*8)) & 0xff;
+ writer->cbor[pt] = to_write;
+ pt++;
+ }
+}
+
+void check_memory(struct cbor_writer *writer, int add_size)
+{
+ if (writer->capacity - writer->pt-add_size < 0)
+ {
+ bug("There is not enough space for cbor response in given buffer");
+ }
+}
+#endif
--- /dev/null
+#ifndef CBOR_H
+#define CBOR_H
+
+#include "nest/bird.h"
+#include "lib/hash.h"
+#include "lib/socket.h"
+
+/**
+ * CBOR Commonalities
+ **/
+
+enum cbor_basic_type {
+ CBOR_POSINT = 0,
+ CBOR_NEGINT = 1,
+ CBOR_BYTES = 2,
+ CBOR_TEXT = 3,
+ CBOR_ARRAY = 4,
+ CBOR_MAP = 5,
+ CBOR_TAG = 6,
+ CBOR_SPECIAL = 7,
+};
+
+const char *cbor_type_str(enum cbor_basic_type);
+
+/**
+ * CBOR Writer
+ **/
+
+struct cbor_writer {
+ buffer data;
+ uint stack_pos, stack_max; /* Nesting of CBOR_ARRAY / CBOR_MAP */
+ struct cbor_writer_stack_item {
+ u64 items;
+ byte *head;
+ } stack[0];
+};
+
+/* Initialization */
+static inline struct cbor_writer *cbor_writer_init(struct cbor_writer *w, uint stack_max_depth, byte *buf, uint size)
+{
+ *w = (struct cbor_writer) {
+ .data = {
+ .start = buf,
+ .pos = buf,
+ .end = buf + size,
+ },
+ .stack_max = stack_max_depth,
+ };
+ return w;
+}
+
+#define cbor_writer_new(p, smax, buf, size) cbor_writer_init(mb_alloc((p), sizeof(struct cbor_writer) + (smax) * sizeof(struct cbor_writer_stack_item)), (smax), (buf), (size))
+
+
+/* Return how many items have been encoded */
+static inline int cbor_writer_done(struct cbor_writer *w)
+{
+ if (w->stack_pos > 0)
+ return -1;
+ else
+ return w->stack[0].items;
+}
+
+/* Integer types */
+bool cbor_put(struct cbor_writer *w, enum cbor_basic_type type, u64 value);
+#define cbor_put_posint(w,v) cbor_put((w), CBOR_POSINT, (v))
+#define cbor_put_negint(w,v) cbor_put((w), CBOR_NEGINT, -1-(v))
+bool cbor_put_int(struct cbor_writer *w, int64_t value);
+
+/* String types */
+bool cbor_put_raw_bytes(struct cbor_writer *w, enum cbor_basic_type type, const byte *block, u64 size);
+#define cbor_put_bytes(w, b, s) cbor_put_raw_bytes((w), CBOR_BYTES, (b), (s))
+#define cbor_put_text(w, b, s) cbor_put_raw_bytes((w), CBOR_TEXT, (b), (s))
+#define cbor_put_string(w, s) cbor_put_raw_bytes((w), CBOR_TEXT, (s), strlen(s))
+#define cbor_put_toks(w, s) cbor_put_raw_bytes((w), CBOR_TEXT, #s, sizeof #s)
+
+/* Compound types */
+bool cbor_put_open(struct cbor_writer *w, enum cbor_basic_type type);
+bool cbor_put_close(struct cbor_writer *w, u64 actual_size, bool strict);
+#define cbor_open_array(w) cbor_put_open((w), CBOR_ARRAY)
+#define cbor_open_map(w) cbor_put_open((w), CBOR_MAP)
+
+#define cbor_close_array(w) cbor_put_close((w), 0, 0)
+#define cbor_close_map(w) cbor_put_close((w), 0, 0)
+
+#define CBOR_PUT_ARRAY(w) for (struct cbor_writer *_w = w, *_ww = cbor_open_array(_w) ? (_w) : (bug("buffer overflow on CBOR_ARRAY"), NULL); (_w = NULL), _ww; cbor_close_array(_ww), _ww = NULL)
+
+#define CBOR_PUT_MAP(w) for (struct cbor_writer *_w = w, *_ww = cbor_open_map(_w) ? (_w) : (bug("buffer overflow on CBOR_MAP"), NULL); (_w = NULL), _ww; cbor_close_map(_ww), _ww = NULL)
+
+/* Specials */
+#define cbor_put_false(w) cbor_put((w), CBOR_SPECIAL, 20);
+#define cbor_put_true(w) cbor_put((w), CBOR_SPECIAL, 21);
+#define cbor_put_null(w) cbor_put((w), CBOR_SPECIAL, 22);
+#define cbor_put_undef(w) cbor_put((w), CBOR_SPECIAL, 23);
+
+#if 0
+void cbor_add_int(struct cbor_writer *writer, int64_t item);
+
+void cbor_add_ipv4(struct cbor_writer *writer, ip4_addr);
+
+void cbor_add_ipv6(struct cbor_writer *writer, ip6_addr);
+
+void cbor_epoch_time(struct cbor_writer *writer, int64_t time, int shift);
+
+void cbor_relativ_time(struct cbor_writer *writer, int64_t time, int shift);
+
+void cbor_add_ipv4_prefix(struct cbor_writer *writer, net_addr_ip4 *n);
+
+
+void cbor_add_ipv6_prefix(struct cbor_writer *writer, net_addr_ip6 *n);
+
+
+void cbor_add_uint(struct cbor_writer *writer, uint64_t item);
+
+void cbor_add_tag(struct cbor_writer *writer, int item);
+
+void cbor_add_string(struct cbor_writer *writer, const char *string);
+
+void cbor_nonterminated_string(struct cbor_writer *writer, const char *string, uint32_t length);
+
+void write_item(struct cbor_writer *writer, uint8_t major, uint64_t num);
+
+void cbor_write_item_with_constant_val_length_4(struct cbor_writer *writer, uint8_t major, uint64_t num);
+
+void rewrite_4bytes_int(struct cbor_writer *writer, int pt, int num);
+#endif
+
+/*
+ * Parser bits
+ */
+
+struct cbor_parser_context {
+ /* Public part */
+ linpool *lp; /* Linpool for in-parser allocations */
+
+ byte type; /* Last parsed type */
+ enum {
+ CPT_VARLEN = 1,
+ } tflags; /* Additional flags for the type / value pair */
+ u64 value; /* Last parsed (integer) value */
+
+ byte *target_buf; /* Target buf for CBOR_BYTES or CBOR_TEXT */
+ uint target_len; /* Set how many bytes to store */
+
+ const char *error; /* Error message */
+
+ /* Private part */
+ lp_state *flush; /* Linpool reset pointer */
+
+ enum { /* Multi-byte reader */
+ CPE_TYPE = 0,
+ CPE_READ_INT,
+ CPE_COMPLETE_INT,
+ CPE_READ_BYTE,
+ CPE_ITEM_DONE,
+ CPE_EXIT,
+ } partial_state;
+
+ u64 partial_countdown; /* How many items remaining in CBOR_ARRAY / CBOR_MAP */
+
+ uint stack_pos, stack_max; /* Nesting of CBOR_ARRAY / CBOR_MAP */
+ u64 stack_countdown[0];
+};
+
+struct cbor_parser_context *cbor_parser_new(pool *, uint stack_max_depth);
+static inline void cbor_parser_free(struct cbor_parser_context *ctx)
+{ rfree(ctx->lp); }
+void cbor_parser_reset(struct cbor_parser_context *ctx);
+
+enum cbor_parse_result {
+ CPR_ERROR = 0,
+ CPR_MORE,
+ CPR_MAJOR,
+ CPR_STR_END,
+ CPR_BLOCK_END,
+} cbor_parse_byte(struct cbor_parser_context *, const byte);
+bool cbor_parse_block_end(struct cbor_parser_context *);
+
+#define CBOR_PARSE_IF(_ctx, _type, _target) if (((_ctx)->type == CBOR_##_type) && CBOR_STORE_##_type((_ctx), _target))
+#define CBOR_PARSE_ONLY(_ctx, _type, _target) CBOR_PARSE_IF(_ctx, _type, _target) {} else CBOR_PARSER_ERROR("Expected %s for %s, got %s", #_type, #_target, cbor_type_str((_ctx)->type))
+
+#define CBOR_STORE_POSINT(_ctx, _target) ((_target = (_ctx)->value), 1)
+#define CBOR_STORE_NEGINT(_ctx, _target) ((_target = -1LL-(_ctx)->value), 1)
+#define CBOR_STORE_BYTES(_ctx, _target) ({ \
+ if ((_ctx)->tflags & CPT_VARLEN) CBOR_PARSER_ERROR("Variable length string not supported yet"); \
+ if ((_target)) CBOR_PARSER_ERROR("Duplicate argument %s", #_target); \
+ ASSERT_DIE(!(_ctx)->target_buf); \
+ _target = (_ctx)->target_buf = lp_alloc((_ctx)->lp, ((_ctx)->target_len = (_ctx)->value) + 1); \
+ 1; })
+#define CBOR_STORE_TEXT CBOR_STORE_BYTES
+
+
+/*
+ * Message channels
+ */
+
+struct cbor_channel;
+typedef enum cbor_parse_result (*cbor_stream_parse_fn)(struct cbor_channel *, enum cbor_parse_result);
+
+struct cbor_stream {
+ HASH(struct cbor_channel) channels;
+ pool *p;
+ struct birdloop *loop;
+ slab *slab;
+ sock *s;
+ cbor_stream_parse_fn parse;
+ void (*cancel)(struct cbor_stream *);
+ struct cbor_channel *cur_rx_channel;
+ u64 hmul;
+ enum {
+ CSTR_INIT,
+ CSTR_EXPECT_ID,
+ CSTR_MSG,
+ CSTR_FINISH,
+ CSTR_CLEANUP,
+ } state;
+ uint writer_depth;
+ struct cbor_parser_context parser;
+};
+
+#define CBOR_STREAM_EMBED(name, N) struct { \
+ struct cbor_stream name; \
+ u64 _##name##_stack_countdown[N]; \
+}
+
+#define CBOR_STREAM_INIT(up, name, chname, p, T) \
+ cbor_stream_init(&(up)->name, p, \
+ ARRAY_SIZE((up)->_##name##_stack_countdown), \
+ ARRAY_SIZE(((T *) NULL)->_##chname##_writer_stack), \
+ sizeof(T))
+
+/* Init and cleanup of CBOR stream */
+void cbor_stream_init(struct cbor_stream *stream, pool *p, uint parser_depth, uint writer_depth, uint channel_size);
+void cbor_stream_attach(struct cbor_stream *, sock *);
+void cbor_stream_cleanup(struct cbor_stream *);
+
+struct cbor_channel {
+ struct cbor_channel *next_hash;
+ struct cbor_stream *stream;
+ cbor_stream_parse_fn parse;
+ void (*cancel)(struct cbor_channel *);
+ pool *p;
+ u64 id;
+ u64 idhash;
+ struct cbor_writer writer;
+};
+
+#define CBOR_CHANNEL_EMBED(name, N) struct { \
+ struct cbor_channel name; \
+ struct cbor_writer_stack_item _##name##_writer_stack[N]; \
+}
+
+extern struct cbor_channel cbor_channel_parse_error;
+
+/* Locally define a new channel */
+struct cbor_channel *cbor_channel_new(struct cbor_stream *);
+
+/* Create a channel with a pre-determined ID.
+ * You have to check nonexistence manually. */
+struct cbor_channel *cbor_channel_create(struct cbor_stream *stream, u64 id);
+/* Find an existing channel */
+struct cbor_channel *cbor_channel_find(struct cbor_stream *, u64 id);
+
+/* Drop the channel */
+void cbor_channel_done(struct cbor_channel *);
+
+struct cbor_writer *cbor_reply_init(struct cbor_channel *);
+void cbor_reply_send(struct cbor_channel *, struct cbor_writer *);
+#define CBOR_REPLY(ch, cw) for (struct cbor_writer *cw = cbor_reply_init(ch); cw; cbor_reply_send(ch, cw), cw = NULL)
+
+
+#endif
--- /dev/null
+#include <string.h>
+#include "lib/cbor_parse_tools.h"
+
+uint compare_buff_str(struct buff_reader *buf_read, uint length, char *string) {
+ if (length != strlen(string)) {
+ return 0;
+ }
+ for (size_t i = 0; i < strlen(string); i++) {
+ if (buf_read->buff[i+buf_read->pt]!=string[i]) {
+ return 0;
+ }
+ }
+ return 1;
+};
+
+struct value
+get_value(struct buff_reader *reader)
+{
+ struct value val;
+ byte *buff = reader->buff;
+ val.major = buff[reader->pt]>>5;
+ int first_byte_val = buff[reader->pt] - (val.major<<5);
+ if (first_byte_val <=23) {
+ val.val = first_byte_val;
+ reader->pt++;
+ } else if (first_byte_val == 0x18)
+ {
+ val.val = buff[reader->pt+1];
+ reader->pt+=2;
+ } else if (first_byte_val == 0x19)
+ {
+ val.val = buff[reader->pt+1];
+ val.val = val.val << 8;
+ val.val += buff[reader->pt+2];
+ reader->pt += 3;
+ } else if (first_byte_val == 0x1a)
+ {
+ val.val = 0;
+ for (int i = 1; i < 4; i++)
+ {
+ val.val += buff[reader->pt+i];
+ val.val = val.val << 8;
+ }
+ val.val += buff[reader->pt+4];
+ reader->pt+=5;
+ } else if (first_byte_val == 0x1b)
+ {
+ val.val = 0;
+ for (int i = 1; i < 8; i++) {
+ val.val += buff[reader->pt+i];
+ val.val = val.val << 8;
+ }
+ val.val += buff[reader->pt+8];
+ reader->pt += 9;
+ } else if (first_byte_val == 0x1f)
+ {
+ val.val = -1;
+ reader->pt++;
+ }
+ if (val.major == NEG_INT)
+ val.val = -1 - val.val;
+ return val;
+}
+
+
+int val_is_break(struct value val)
+{
+ return val.major == FLOAT && val.val == -1; // break code is 0xff, so the major is same for float and break
+}
--- /dev/null
+#include "sysdep/config.h"
+#include "lib/birdlib.h"
+
+enum functions {
+ SHOW_STATUS = 0,
+ SHOW_MEMORY = 1,
+ SHOW_SYMBOLS = 2,
+ SHOW_OSPF = 3,
+ SHOW_PROTOCOLS = 4,
+};
+
+enum cbor_majors {
+ UINT = 0,
+ NEG_INT = 1,
+ BYTE_STR = 2,
+ TEXT = 3,
+ ARRAY = 4,
+ BLOCK = 5,
+ TAG = 6,
+ FLOAT = 7,
+};
+
+
+struct value {
+ int major;
+ int64_t val;
+};
+
+struct buff_reader {
+ byte *buff;
+ uint pt;
+ uint size;
+};
+
+
+uint compare_buff_str(struct buff_reader *buf_read, uint length, char *string);
+
+struct value
+get_value(struct buff_reader *reader);
+
+
+int val_is_break(struct value val);
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lib/cbor_shortcuts.h"
+
+
+
+void cbor_string_string(struct cbor_writer *writer, char *key, const char *value) {
+ cbor_add_string(writer, key);
+ cbor_add_string(writer, value);
+}
+
+void cbor_string_int(struct cbor_writer *writer, char *key, int64_t value) {
+ cbor_add_string(writer, key);
+ cbor_add_int(writer, value);
+}
+
+void cbor_string_uint(struct cbor_writer *writer, char *key, u64 value) {
+ cbor_add_string(writer, key);
+ cbor_add_uint(writer, value);
+}
+
+void cbor_string_epoch_time(struct cbor_writer *writer, char *key, int64_t time, int shift) {
+ cbor_add_string(writer, key);
+ cbor_epoch_time(writer, time, shift);
+}
+
+void cbor_string_relativ_time(struct cbor_writer *writer, char *key, int64_t time, int shift) {
+ cbor_add_string(writer, key);
+ cbor_relativ_time(writer, time, shift);
+}
+
+void cbor_string_ip(struct cbor_writer *writer, char *key, ip_addr addr) {
+ cbor_add_string(writer, key);
+ if (ipa_is_ip4(addr))
+ cbor_add_ipv4(writer, ipa_to_ip4(addr));
+ else
+ cbor_add_ipv6(writer, ipa_to_ip6(addr));
+}
+
+void cbor_string_ipv4(struct cbor_writer *writer, char *key, ip4_addr addr) {
+ cbor_add_string(writer, key);
+ cbor_add_ipv4(writer, addr);
+}
+
+void cbor_string_ipv6(struct cbor_writer *writer, char *key, ip6_addr addr) {
+ cbor_add_string(writer, key);
+ cbor_add_ipv6(writer, addr);
+}
+
+void cbor_named_block_two_ints(struct cbor_writer *writer, char *key, char *name1, int val1, char *name2, int val2) {
+ cbor_add_string(writer, key);
+ cbor_open_block_with_length(writer, 2);
+ cbor_add_string(writer, name1);
+ cbor_add_int(writer, val1);
+ cbor_add_string(writer, name2);
+ cbor_add_int(writer, val2);
+}
+
+void cbor_write_to_file(struct cbor_writer *writer, char *filename) {
+ FILE *write_ptr;
+
+ write_ptr = fopen(filename, "wb");
+
+ fwrite(writer->cbor, writer->pt, 1, write_ptr);
+ fclose(write_ptr);
+}
+
+void cbor_add_net(struct cbor_writer *writer, const net_addr *N) {
+ // Original switch comes from lib/net.c and contains more cases.
+ net_addr_union *n = (void *) N;
+
+ switch (n->n.type)
+ {
+ case NET_IP4:
+ cbor_add_ipv4_prefix(writer, &n->ip4);
+ return;
+ case NET_IP6:
+ cbor_add_ipv6_prefix(writer, &n->ip6);
+ return;
+ default:
+ bug("net type unsupported by cbor (yet).");
+ }
+}
+
+
+
--- /dev/null
+#ifndef CBOR_SHORTCUTS_H
+#define CBOR_SHORTCUTS_H
+
+#include "lib/cbor.h"
+#include "sysdep/config.h"
+#include "lib/birdlib.h"
+#include "nest/protocol.h"
+#include "lib/ip.h"
+
+
+void cbor_string_string(struct cbor_writer *writer, char *key, const char *value);
+
+void cbor_string_int(struct cbor_writer *writer, char *key, int64_t value);
+
+void cbor_string_epoch_time(struct cbor_writer *writer, char *key, int64_t time, int shift);
+void cbor_string_relativ_time(struct cbor_writer *writer, char *key, int64_t time, int shift);
+void cbor_string_uint(struct cbor_writer *writer, char *key, u64 value);
+void cbor_string_ip(struct cbor_writer *writer, char *key, ip_addr);
+void cbor_string_ipv4(struct cbor_writer *writer, char *key, ip4_addr);
+void cbor_string_ipv6(struct cbor_writer *writer, char *key, ip6_addr);
+void cbor_named_block_two_ints(struct cbor_writer *writer, char *key, char *name1, int val1, char *name2, int val2);
+void cbor_write_to_file(struct cbor_writer *writer, char *filename);
+
+void cbor_add_net(struct cbor_writer *writer, const net_addr *N);
+
+#endif
--- /dev/null
+
+#include "test/birdtest.h"
+#include "lib/cbor.h"
+#include "lib/cbor_parse_tools.h"
+
+#define BUFF_LEN 100
+
+struct cbor_writer *w;
+struct buff_reader reader;
+
+void print_to_file_for_control_from_outside(void)
+{
+ FILE *write_ptr;
+
+ write_ptr = fopen("a.cbor", "wb");
+
+ fwrite(w->cbor, w->pt, 1, write_ptr);
+ fclose(write_ptr);
+
+}
+
+static int test_int(void)
+{
+ reader.pt = w->pt = 0;
+ int num_items = 13;
+ int64_t test_int[] = {-123456789012345678, -1234567890, -12345, -123, -25, -13, 0, 13, 25, 123, 12345, 1234567890, 123456789012345678};
+ byte bin_int[] = {0x8d, 0x3b, 0x1, 0xb6, 0x9b, 0x4b, 0xa6, 0x30, 0xf3, 0x4d, 0x3a, 0x49, 0x96, 0x2, 0xd1, 0x39, 0x30, 0x38, 0x38, 0x7a, 0x38, 0x18, 0x2c, 0x0, 0xd, 0x18, 0x19, 0x18, 0x7b, 0x19, 0x30, 0x39, 0x1a, 0x49, 0x96, 0x2, 0xd2, 0x1b, 0x1, 0xb6, 0x9b, 0x4b, 0xa6, 0x30, 0xf3, 0x4e};
+ cbor_open_list_with_length(w, num_items);
+ for (int i = 0; i < num_items; i++)
+ {
+ cbor_add_int(w, test_int[i]);
+ }
+
+ for (long unsigned int i = 0; i < sizeof(bin_int); i++)
+ {
+ bt_assert((w->cbor[i] & 0xff) == (bin_int[i] & 0xff));
+ }
+
+ struct value val = get_value(&reader);
+ bt_assert(val.major = ARRAY);
+ bt_assert(val.val = num_items);
+ for (int i = 0; i < num_items; i++)
+ {
+ val = get_value(&reader);
+ bt_assert(val.major == NEG_INT || val.major == UINT);
+ bt_assert(val.val == test_int[i]);
+ }
+ return 1;
+}
+
+static int non_aligned_int(void)
+{
+ w->pt = reader.pt = 0;
+ int num_items = 4;
+ cbor_open_list_with_length(w, num_items);
+
+ cbor_add_int(w, 30);
+ w->cbor[w->pt - 1] = 1;
+
+ cbor_add_int(w, 300);
+ w->cbor[w->pt - 2] = 0;
+ w->cbor[w->pt - 1] = 1;
+
+ cbor_add_int(w, 300000000);
+ for (int i = 4; i > 1; i--)
+ {
+ w->cbor[w->pt - i] = 0;
+ }
+ w->cbor[w->pt - 1] = 1;
+
+ cbor_add_int(w, 30000000000000000);
+ for (int i = 8; i > 1; i--)
+ {
+ w->cbor[w->pt - i] = 0;
+ }
+ w->cbor[w->pt - 1] = 1;
+
+ struct value val = get_value(&reader);
+ bt_assert(val.major = ARRAY);
+ bt_assert(val.val = num_items);
+
+ for (int i = 0; i < num_items; i++)
+ {
+ val = get_value(&reader);
+ bt_assert(val.major == UINT);
+ bt_assert(val.val == 1);
+ }
+ return 1;
+}
+
+static int test_majors(void)
+{
+ w->pt = reader.pt = 0;
+ cbor_open_block(w);
+ cbor_open_list_with_length(w, 4);
+ cbor_add_string(w, "b");
+ cbor_add_int(w, 1);
+ cbor_add_int(w, -1);
+ cbor_add_ipv4(w, ip4_build(18, 4, 0, 0));
+ cbor_close_block_or_list(w);
+
+ struct value val = get_value(&reader);
+ bt_assert(val.major == BLOCK);
+ val = get_value(&reader);
+ bt_assert(val.major == ARRAY);
+ val = get_value(&reader);
+ bt_assert(val.major == TEXT);
+ reader.pt += val.val;
+ val = get_value(&reader);
+ bt_assert(val.major == UINT);
+ val = get_value(&reader);
+ bt_assert(val.major == NEG_INT);
+ val = get_value(&reader);
+ bt_assert(val.major == TAG);
+ val = get_value(&reader);
+ bt_assert(val.major == BYTE_STR);
+ reader.pt += val.val;
+ val = get_value(&reader);
+ bt_assert(val_is_break(val));
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ bt_init(argc, argv);
+ byte buff[BUFF_LEN];
+ w = cbor_init(buff, BUFF_LEN, tmp_linpool);
+ reader.buff = buff;
+ reader.size = BUFF_LEN;
+ reader.pt = 0;
+
+ bt_test_suite(test_int, "Adding and reading integer from cbor.");
+ bt_test_suite(non_aligned_int, "Reading non-alligned int from cbor.");
+ bt_test_suite(test_majors, "Test cbor datatypes.");
+
+ return bt_exit_value();
+}
if (l->loop) birdloop_ping(l->loop);
}
-void io_log_event(void *hook, void *data, uint flag);
-
/**
* ev_run_list - run an event list
* @l: an event list
if (!--limit)
return 1;
- /* This is ugly hack, we want to log just events executed from the main I/O loop */
- if ((l == &global_event_list) || (l == &global_work_list))
- io_log_event(e->hook, e->data, DL_EVENTS);
-
edlog(l, e, NULL, 6, EDL_RUN_LIST);
/* Inactivate the event */
event *next = atomic_load_explicit(&e->next, memory_order_relaxed);
[FLOW_TYPE_LABEL] = 4
};
-static u8
+u8
flow_max_value_length(enum flow_type type, int ipv6)
{
return ipv6 ? flow6_max_value_length[type] : flow4_max_value_length[type];
}
-/**
- * flow_check_cf_bmk_values - check value/bitmask part of flowspec component
- * @fb: flow builder instance
- * @neg: negation operand
- * @val: value from value/mask pair
- * @mask: bitmap mask from value/mask pair
- *
- * This function checks value/bitmask pair. If some problem will appear, the
- * function calls cf_error() function with a textual description of reason
- * to failing of validation.
- */
-void
-flow_check_cf_bmk_values(struct flow_builder *fb, u8 neg, u32 val, u32 mask)
-{
- flow_check_cf_value_length(fb, val);
- flow_check_cf_value_length(fb, mask);
-
- if (neg && !(val == 0 || val == mask))
- cf_error("For negation, value must be zero or bitmask");
-
- if ((fb->this_type == FLOW_TYPE_TCP_FLAGS) && (mask & 0xf000))
- cf_error("Invalid mask 0x%x, must not exceed 0xfff", mask);
-
- if ((fb->this_type == FLOW_TYPE_FRAGMENT) && fb->ipv6 && (mask & 0x01))
- cf_error("Invalid mask 0x%x, bit 0 must be 0", mask);
-
- if (val & ~mask)
- cf_error("Value 0x%x outside bitmask 0x%x", val, mask);
-}
-
-/**
- * flow_check_cf_value_length - check value by flowspec component type
- * @fb: flow builder instance
- * @val: value
- *
- * This function checks if the value is in range of component's type support.
- * If some problem will appear, the function calls cf_error() function with
- * a textual description of reason to failing of validation.
- */
-void
-flow_check_cf_value_length(struct flow_builder *fb, u32 val)
-{
- enum flow_type t = fb->this_type;
- u8 max = flow_max_value_length(t, fb->ipv6);
-
- if (t == FLOW_TYPE_DSCP && val > 0x3f)
- cf_error("%s value %u out of range (0-63)", flow_type_str(t, fb->ipv6), val);
-
- if (max == 1 && (val > 0xff))
- cf_error("%s value %u out of range (0-255)", flow_type_str(t, fb->ipv6), val);
-
- if (max == 2 && (val > 0xffff))
- cf_error("%s value %u out of range (0-65535)", flow_type_str(t, fb->ipv6), val);
-}
-
static enum flow_validated_state
flow_validate(const byte *nlri, uint len, int ipv6)
{
cf_error("Invalid flow route: %s", flow_validated_state_str(r));
}
-
/*
* Flowspec Builder
*/
const char *flow_validated_state_str(enum flow_validated_state code);
enum flow_validated_state flow4_validate(const byte *nlri, uint len);
enum flow_validated_state flow6_validate(const byte *nlri, uint len);
-void flow_check_cf_value_length(struct flow_builder *fb, u32 expr);
-void flow_check_cf_bmk_values(struct flow_builder *fb, u8 neg, u32 val, u32 mask);
-void flow4_validate_cf(net_addr_flow4 *f);
-void flow6_validate_cf(net_addr_flow6 *f);
-
+u8 flow_max_value_length(enum flow_type type, int ipv6);
/*
* Net Formatting
#ifndef _BIRD_IO_LOOP_H_
#define _BIRD_IO_LOOP_H_
+extern struct birdloop main_birdloop;
+
#include "nest/bird.h"
#include "lib/lists.h"
#include "lib/locking.h"
#include "lib/resource.h"
+#include "lib/buffer.h"
#include "lib/event.h"
+#include "lib/timer.h"
#include "lib/socket.h"
-extern struct birdloop main_birdloop;
-
/* Currently running birdloop */
extern _Thread_local struct birdloop *this_birdloop;
void birdloop_add_socket(struct birdloop *, struct birdsock *);
void birdloop_remove_socket(struct birdloop *, struct birdsock *);
+/* Initializations */
void birdloop_init(void);
/* Configure threads */
void thread_group_finalize_config(void);
+struct thread_config {
+ uint count;
+};
+
+void bird_thread_commit(struct thread_config *new);
+
+/* Minimalist main */
+void birdloop_minimalist_main(void) NORET;
+
#endif /* _BIRD_IO_LOOP_H_ */
#include "lib/event.h"
#include "lib/locking.h"
+#include "lib/string.h"
#include "lib/tlists.h"
#define TLIST_PREFIX obstacle
/* Allocator of whole pages; for use in slabs and other high-level allocators. */
#define PAGE_HEAD(x) ((void *) (((uintptr_t) (x)) & ~(page_size-1)))
extern long page_size;
-extern _Atomic int pages_kept;
-extern _Atomic int pages_kept_locally;
-extern _Atomic int pages_kept_cold;
-extern _Atomic int pages_kept_cold_index;
+extern _Atomic uint pages_kept;
+extern _Atomic uint pages_kept_locally;
+extern _Atomic uint pages_kept_cold;
+extern _Atomic uint pages_kept_cold_index;
extern _Atomic int pages_total;
extern _Atomic int alloc_locking_in_rcu;
void *alloc_page(void);
void resource_sys_init(void);
+struct alloc_config;
+void alloc_preconfig(struct alloc_config *);
+
#ifdef HAVE_LIBDMALLOC
/*
* The standard dmalloc macros tend to produce lots of namespace
--- /dev/null
+/*
+ * BIRD Internet Routing Daemon -- Global runtime context
+ *
+ * (c) 2024 Maria Matejka <mq@jmq.cz>
+ * (c) 2024 CZ.NIC, z.s.p.o.
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include "lib/runtime.h"
+
+int shutting_down = 0;
+
+struct global_runtime global_runtime_initial = {
+ .tf_log = {
+ .fmt1 = "%F %T.%3f",
+ },
+ .tf_base = {
+ .fmt1 = "%F %T.%3f",
+ },
+};
+
+struct global_runtime * _Atomic global_runtime = &global_runtime_initial;
+
+void
+switch_runtime(struct global_runtime *new)
+{
+ new->load_time = current_time();
+ atomic_store_explicit(&global_runtime, new, memory_order_release);
+
+ /* We have to wait until every reader surely doesn't read the old values */
+ synchronize_rcu();
+}
+
--- /dev/null
+/*
+ * BIRD Internet Routing Daemon -- Global runtime context
+ *
+ * (c) 2024 Maria Matejka <mq@jmq.cz>
+ * (c) 2024 CZ.NIC, z.s.p.o.
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include "lib/timer.h"
+
+/* Shutdown requested, behave accordingly.
+ * Initially zero, once set to one, never reset. */
+extern int shutting_down;
+
+/* I/O loops log information about task scheduling */
+enum latency_debug_flags {
+ DL_PING = 1,
+ DL_WAKEUP = 2,
+ DL_SCHEDULING = 4,
+ DL_ALLOCATOR = 8,
+ DL_SOCKETS = 0x10,
+ DL_EVENTS = 0x20,
+ DL_TIMERS = 0x40,
+};
+
+struct alloc_config {
+ uint keep_mem_max_global; /* How much free memory is kept hot in total */
+ uint keep_mem_max_local; /* How much free memory is kept hot in every thread */
+ uint at_once; /* How much memory to allocate at once */
+};
+
+#define GLOBAL_RUNTIME_CONTENTS \
+ struct timeformat tf_log; /* Time format for the logfile */ \
+ struct timeformat tf_base; /* Time format for other purposes */ \
+ btime load_time; /* When we reconfigured last time */ \
+ enum latency_debug_flags latency_debug; /* What to log about IO loop */ \
+ u32 latency_limit; /* Events with longer duration are logged (us) */ \
+ u32 watchdog_warning; /* I/O loop watchdog limit for warning (us) */ \
+ const char *hostname; /* Hostname */ \
+ struct alloc_config alloc; /* Allocation settings */ \
+
+struct global_runtime { GLOBAL_RUNTIME_CONTENTS };
+extern struct global_runtime * _Atomic global_runtime;
+
+void switch_runtime(struct global_runtime *);
#include <errno.h>
+#include "lib/ip.h"
#include "lib/resource.h"
#include "lib/event.h"
#ifdef HAVE_LIBSSH
uint fast_rx; /* RX has higher priority in event loop */
uint rbsize;
int (*rx_hook)(struct birdsock *, uint size); /* NULL=receiving turned off, returns 1 to clear rx buffer */
+ int (*rx_paused)(struct birdsock *, uint size); /* stored rx_hook when paused */
byte *tbuf, *tpos; /* NULL=allocate automatically */
byte *ttx; /* Internal */
void (*tx_hook)(struct birdsock *);
void (*err_hook)(struct birdsock *, int); /* errno or zero if EOF */
+ void (*err_paused)(struct birdsock *, int); /* called first when paused */
/* Information about received datagrams (UDP, RAW), valid in rx_hook */
ip_addr faddr, laddr; /* src (From) and dst (Local) address of the datagram */
#define sk_new(X) sock_new(X) /* Wrapper to avoid name collision with OpenSSL */
int sk_open(sock *, struct birdloop *); /* Open socket */
+int sk_open_unix(struct birdsock *s, struct birdloop *, const char *name); /* Open UNIX socket */
void sk_reloop(sock *, struct birdloop *); /* Move socket to another loop. Both loops must be locked. */
static inline void sk_close(sock *s) { rfree(&s->r); } /* Explicitly close socket */
int sk_send_to(sock *, uint len, ip_addr to, uint port); /* sk_send to given destination */
void sk_reallocate(sock *); /* Free and allocate tbuf & rbuf */
void sk_pause_rx(struct birdloop *loop, sock *s);
-void sk_resume_rx(struct birdloop *loop, sock *s, int (*hook)(sock *, uint));
+void sk_resume_rx(struct birdloop *loop, sock *s);
void sk_set_rbsize(sock *s, uint val); /* Resize RX buffer */
void sk_set_tbsize(sock *s, uint val); /* Resize TX buffer, keeping content */
void sk_set_tbuf(sock *s, void *tbuf); /* Switch TX buffer, NULL-> return to internal */
#define tmp_sprintf(...) lp_sprintf(tmp_linpool, __VA_ARGS__)
#define tmp_vsprintf(...) lp_vsprintf(tmp_linpool, __VA_ARGS__)
+#define tmp_sprintf(...) lp_sprintf(tmp_linpool, __VA_ARGS__)
+#define tmp_vsprintf(...) lp_vsprintf(tmp_linpool, __VA_ARGS__)
+
int buffer_vprint(buffer *buf, const char *fmt, va_list args);
int buffer_print(buffer *buf, const char *fmt, ...);
void buffer_puts(buffer *buf, const char *str);
return z;
}
+#define tmp_strdup(x) lp_strdup(tmp_linpool, (x))
+
+static inline char *
+mb_strdup(pool *p, const char *c)
+{
+ size_t l = strlen(c) + 1;
+ char *z = mb_alloc(p, l);
+ memcpy(z, c, l);
+ return z;
+}
+
static inline void
memset32(void *D, u32 val, uint n)
{
BUFFER_PUSH(loop->timers) = NULL;
}
-void io_log_event(void *hook, void *data, uint flag);
-
void
-timers_fire(struct timeloop *loop, int io_log)
+timers_fire(struct timeloop *loop)
{
TLOCK_TIMER_ASSERT(loop);
else
tm_stop(t);
- /* This is ugly hack, we want to log just timers executed from the main I/O loop */
- if (io_log)
- io_log_event(t->hook, t->data, DL_TIMERS);
-
t->hook(t);
tmp_flush();
}
/* For I/O loop */
void timers_init(struct timeloop *loop, pool *p);
-void timers_fire(struct timeloop *loop, int io_log);
+void timers_fire(struct timeloop *loop);
/* For extra fine precision */
u64 ns_now(void);
+#define NSEC_IN_SEC ((u64) (1000 * 1000 * 1000))
+#define NSEC_TO_SEC(x) ((x) / NSEC_IN_SEC)
+#define CURRENT_SEC NSEC_TO_SEC(ns_now())
+
struct timeformat {
const char *fmt1, *fmt2;
btime limit;
#ifndef HAVE_LIBDMALLOC
-#if DEBUGGING
+#if DEBUG_ALLOCATOR
struct minfo {
void *ptr;
uint size;
-src := cli.c cmds.c iface.c locks.c mpls.c neighbor.c password.c proto.c proto-build.c rt-attr.c rt-dev.c rt-export.c rt-fib.c rt-show.c rt-table.c
+src := iface.c neighbor.c
+obj := $(src-o-files)
+$(all-lib)
+
+src := cli.c cmds.c iface-cli.c locks.c mpls.c password.c proto.c proto-build.c rt-attr.c rt-dev.c rt-export.c rt-fib.c rt-show.c rt-table.c
obj := $(src-o-files)
$(all-daemon)
$(cf-local)
struct config f;
int res;
- if (OBSREF_GET(config)->cli_debug > 1)
+ if (BIRD_GLOBAL_RUNTIME->cli_debug > 1)
log(L_TRACE "CLI: %s", c->rx_buf);
bzero(&f, sizeof(f));
f.mem = c->parser_pool;
cmd_show_status(void)
{
rcu_read_lock();
- struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_acquire);
+ union bird_global_runtime *gr = BIRD_GLOBAL_RUNTIME;
struct timeformat *tf = this_cli->tf ?: &gr->tf_base;
byte tim[TM_DATETIME_BUFFER_SIZE];
conf: rtrid ;
rtrid:
- ROUTER ID idval ';' { new_config->router_id = $3; }
+ ROUTER ID idval ';' { new_config->runtime.router_id = $3; }
| ROUTER ID FROM iface_patt ';' { new_config->router_id_from = this_ipatt; }
;
conf: hostname_override ;
-hostname_override: HOSTNAME text ';' { new_config->hostname = $2; } ;
+hostname_override: HOSTNAME text ';' { new_config->runtime.hostname = $2; } ;
conf: gr_opts ;
-gr_opts: GRACEFUL RESTART WAIT expr ';' { new_config->gr_wait = $4; } ;
+gr_opts: GRACEFUL RESTART WAIT expr ';' { new_config->runtime.gr_wait = $4; } ;
/* Network types (for tables, channels) */
DEBUG PROTOCOLS debug_mask { new_config->proto_default_debug = $3; }
| DEBUG CHANNELS debug_mask { new_config->channel_default_debug = $3; }
| DEBUG TABLES debug_mask { new_config->table_default_debug = $3; }
- | DEBUG COMMANDS expr { new_config->cli_debug = $3; }
+ | DEBUG COMMANDS expr { new_config->runtime.cli_debug = $3; }
| DEBUG SHOW ROUTE debug_mask { new_config->show_route_debug = $4; }
;
;
timeformat_which:
- ROUTE { $$ = &new_config->tf_route; }
- | PROTOCOL { $$ = &new_config->tf_proto; }
- | BASE { $$ = &new_config->tf_base; }
- | LOG { $$ = &new_config->tf_log; }
+ ROUTE { $$ = &new_config->runtime.tf_route; }
+ | PROTOCOL { $$ = &new_config->runtime.tf_proto; }
+ | BASE { $$ = &new_config->runtime.tf_base; }
+ | LOG { $$ = &new_config->runtime.tf_log; }
;
timeformat_spec:
init_list(&($$->tables));
$$->filter = FILTER_ACCEPT;
$$->cli = this_cli;
- $$->tf_route = this_cli->main_config->tf_route;
+ $$->tf_route = this_cli->main_config->runtime.tf_route;
}
| r_args net_any {
$$ = $1;
CF_CLI(DUMP SOCKETS, text,, [[Dump open sockets]])
{ cmd_dump_file(this_cli, $3, "sockets", sk_dump_all); } ;
CF_CLI(DUMP EVENTS, text,, [[Dump event log]])
-{ cmd_dump_file(this_cli, $3, "event log", io_log_dump); } ;
+{ /* cmd_dump_file(this_cli, $3, "event log", io_log_dump); */ cli_msg(0, "Warning: this command did nothing and we still need to figure out how to reimplement it properly."); } ;
CF_CLI(DUMP INTERFACES, text,, [[Dump interface information]])
{ cmd_dump_file(this_cli, $3, "interfaces", if_dump_all); } ;
CF_CLI(DUMP NEIGHBORS, text,, [[Dump neighbor cache]])
--- /dev/null
+/*
+ * BIRD -- Management of Interfaces and Neighbor Cache
+ *
+ * (c) 1998--2000 Martin Mares <mj@ucw.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include "nest/bird.h"
+#include "nest/iface.h"
+#include "nest/protocol.h"
+#include "nest/cli.h"
+#include "lib/resource.h"
+#include "lib/string.h"
+#include "lib/locking.h"
+
+/*
+ * CLI commands.
+ */
+
+static void
+if_show_addr(struct ifa *a)
+{
+ byte *flg, opp[IPA_MAX_TEXT_LENGTH + 16];
+
+ flg = (a->flags & IA_PRIMARY) ? "Preferred, " : (a->flags & IA_SECONDARY) ? "Secondary, " : "";
+
+ if (ipa_nonzero(a->opposite))
+ bsprintf(opp, "opposite %I, ", a->opposite);
+ else
+ opp[0] = 0;
+
+ cli_msg(-1003, "\t%I/%d (%s%sscope %s)",
+ a->ip, a->prefix.pxlen, flg, opp, ip_scope_text(a->scope));
+}
+
+void
+if_show(void)
+{
+ struct ifa *a;
+ char *type;
+
+ IFACE_WALK(i)
+ {
+ if (i->flags & IF_SHUTDOWN)
+ continue;
+
+ char mbuf[16 + sizeof(i->name)] = {};
+ if (i->master != &default_vrf)
+ bsprintf(mbuf, " master=%s", i->master->name);
+ else if (i->master_index)
+ bsprintf(mbuf, " master=#%u", i->master_index);
+
+ cli_msg(-1001, "%s %s (index=%d%s)", i->name, (i->flags & IF_UP) ? "up" : "down", i->index, mbuf);
+ if (!(i->flags & IF_MULTIACCESS))
+ type = "PtP";
+ else
+ type = "MultiAccess";
+ cli_msg(-1004, "\t%s%s%s Admin%s Link%s%s%s MTU=%d",
+ type,
+ (i->flags & IF_BROADCAST) ? " Broadcast" : "",
+ (i->flags & IF_MULTICAST) ? " Multicast" : "",
+ (i->flags & IF_ADMIN_UP) ? "Up" : "Down",
+ (i->flags & IF_LINK_UP) ? "Up" : "Down",
+ (i->flags & IF_LOOPBACK) ? " Loopback" : "",
+ (i->flags & IF_IGNORE) ? " Ignored" : "",
+ i->mtu);
+
+ WALK_LIST(a, i->addrs)
+ if (a->prefix.type == NET_IP4)
+ if_show_addr(a);
+
+ WALK_LIST(a, i->addrs)
+ if (a->prefix.type == NET_IP6)
+ if_show_addr(a);
+ }
+ cli_msg(0, "");
+}
+
+void
+if_show_summary(void)
+{
+ cli_msg(-2005, "%-10s %-6s %-18s %s", "Interface", "State", "IPv4 address", "IPv6 address");
+ IFACE_WALK(i)
+ {
+ byte a4[IPA_MAX_TEXT_LENGTH + 17];
+ byte a6[IPA_MAX_TEXT_LENGTH + 17];
+
+ if (i->flags & IF_SHUTDOWN)
+ continue;
+
+ if (i->addr4)
+ bsprintf(a4, "%I/%d", i->addr4->ip, i->addr4->prefix.pxlen);
+ else
+ a4[0] = 0;
+
+ if (i->addr6)
+ bsprintf(a6, "%I/%d", i->addr6->ip, i->addr6->prefix.pxlen);
+ else
+ a6[0] = 0;
+
+ cli_msg(-1005, "%-10s %-6s %-18s %s",
+ i->name, (i->flags & IF_UP) ? "up" : "down", a4, a6);
+ }
+ cli_msg(0, "");
+}
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/cli.h"
#include "lib/resource.h"
#include "lib/string.h"
#include "lib/locking.h"
#include "conf/conf.h"
-#include "sysdep/unix/krt.h"
DOMAIN(attrs) iface_domain;
IFACE_WALK(i)
if_dump_locked(dreq, i);
rcu_read_lock();
- RDUMP("Router ID: %08x\n", atomic_load_explicit(&global_runtime, memory_order_relaxed)->router_id);
+ RDUMP("Router ID: %08x\n", BIRD_GLOBAL_RUNTIME->router_id);
rcu_read_unlock();
}
*pos = new;
}
+int (*kif_update_sysdep_addr)(struct iface *) = NULL;
+
static void
if_recalc_preferred(struct iface *i)
{
* 2) Sysdep IPv4 address (BSD)
* 3) Old preferred address
* 4) First address in list
- */
+ */
- struct kif_iface_config *ic = kif_get_iface_config(i);
+ struct iface_config *ic = i->cf;
struct ifa *a4 = i->addr4, *a6 = i->addr6, *ll = i->llv6;
ip_addr pref_v4 = ic->pref_v4;
uint change = 0;
- if (kif_update_sysdep_addr(i))
+ if (kif_update_sysdep_addr && kif_update_sysdep_addr(i))
change |= IF_CHANGE_SYSDEP;
/* BSD sysdep address */
}
return (!x->n.next && !y->n.next);
}
-
-/*
- * CLI commands.
- */
-
-static void
-if_show_addr(struct ifa *a)
-{
- byte *flg, opp[IPA_MAX_TEXT_LENGTH + 16];
-
- flg = (a->flags & IA_PRIMARY) ? "Preferred, " : (a->flags & IA_SECONDARY) ? "Secondary, " : "";
-
- if (ipa_nonzero(a->opposite))
- bsprintf(opp, "opposite %I, ", a->opposite);
- else
- opp[0] = 0;
-
- cli_msg(-1003, "\t%I/%d (%s%sscope %s)",
- a->ip, a->prefix.pxlen, flg, opp, ip_scope_text(a->scope));
-}
-
-void
-if_show(void)
-{
- struct ifa *a;
- char *type;
-
- IFACE_WALK(i)
- {
- if (i->flags & IF_SHUTDOWN)
- continue;
-
- char mbuf[16 + sizeof(i->name)] = {};
- if (i->master != &default_vrf)
- bsprintf(mbuf, " master=%s", i->master->name);
- else if (i->master_index)
- bsprintf(mbuf, " master=#%u", i->master_index);
-
- cli_msg(-1001, "%s %s (index=%d%s)", i->name, (i->flags & IF_UP) ? "up" : "down", i->index, mbuf);
- if (!(i->flags & IF_MULTIACCESS))
- type = "PtP";
- else
- type = "MultiAccess";
- cli_msg(-1004, "\t%s%s%s Admin%s Link%s%s%s MTU=%d",
- type,
- (i->flags & IF_BROADCAST) ? " Broadcast" : "",
- (i->flags & IF_MULTICAST) ? " Multicast" : "",
- (i->flags & IF_ADMIN_UP) ? "Up" : "Down",
- (i->flags & IF_LINK_UP) ? "Up" : "Down",
- (i->flags & IF_LOOPBACK) ? " Loopback" : "",
- (i->flags & IF_IGNORE) ? " Ignored" : "",
- i->mtu);
-
- WALK_LIST(a, i->addrs)
- if (a->prefix.type == NET_IP4)
- if_show_addr(a);
-
- WALK_LIST(a, i->addrs)
- if (a->prefix.type == NET_IP6)
- if_show_addr(a);
- }
- cli_msg(0, "");
-}
-
-void
-if_show_summary(void)
-{
- cli_msg(-2005, "%-10s %-6s %-18s %s", "Interface", "State", "IPv4 address", "IPv6 address");
- IFACE_WALK(i)
- {
- byte a4[IPA_MAX_TEXT_LENGTH + 17];
- byte a6[IPA_MAX_TEXT_LENGTH + 17];
-
- if (i->flags & IF_SHUTDOWN)
- continue;
-
- if (i->addr4)
- bsprintf(a4, "%I/%d", i->addr4->ip, i->addr4->prefix.pxlen);
- else
- a4[0] = 0;
-
- if (i->addr6)
- bsprintf(a6, "%I/%d", i->addr6->ip, i->addr6->prefix.pxlen);
- else
- a6[0] = 0;
-
- cli_msg(-1005, "%-10s %-6s %-18s %s",
- i->name, (i->flags & IF_UP) ? "up" : "down", a4, a6);
- }
- cli_msg(0, "");
-}
struct ifa *addr6; /* Primary address for IPv6 */
struct ifa *llv6; /* Primary link-local address for IPv6 */
ip4_addr sysdep; /* Arbitrary IPv4 address for internal sysdep use */
+ struct iface_config *cf; /* Attached configuration */
list neighbors; /* All neighbors on this interface */
unsigned uc; /* Use (link) count */
};
+/* Sysdep address updater */
+extern int (*kif_update_sysdep_addr)(struct iface *);
+
#define IF_UP 1 /* Currently just IF_ADMIN_UP */
#define IF_MULTIACCESS 2
#define IF_BROADCAST 4
struct iface_patt *iface_patt_find(list *l, struct iface *i, struct ifa *a);
int iface_patts_equal(list *, list *, int (*)(struct iface_patt *, struct iface_patt *));
+/* Basic interface configuration */
+struct iface_config {
+ struct iface_patt i;
+
+ ip_addr pref_v4;
+ ip_addr pref_v6;
+ ip_addr pref_ll;
+};
+
u32 if_choose_router_id(struct iface_patt *mask, u32 old_id);
global_commit() because it is postponed after start of device protocol */
if ((phase == PROTOCOL_STARTUP_NECESSARY) && !old)
{
- struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
+ union bird_global_runtime *gr = BIRD_GLOBAL_RUNTIME;
if (!gr->router_id)
{
gr->router_id = if_choose_router_id(new->router_id_from, 0);
_graceful_recovery_context.grc_state = GRS_ACTIVE;
_graceful_recovery_context.wait_timer = (timer) { .hook = graceful_recovery_timeout };
- u32 gr_wait = atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait;
+ u32 gr_wait = BIRD_GLOBAL_RUNTIME->gr_wait;
tm_start(&_graceful_recovery_context.wait_timer, gr_wait S);
callback_init(&_graceful_recovery_context.obstacles_cleared, graceful_recovery_done, &main_birdloop);
obstacle_target_count(&_graceful_recovery_context.obstacles));
cli_msg(-24, " Wait timer is %t/%u",
tm_remains(&_graceful_recovery_context.wait_timer),
- atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait);
+ BIRD_GLOBAL_RUNTIME->gr_wait);
}
/**
if (p->proto->get_status)
p->proto->get_status(p, buf);
- rcu_read_lock();
- struct timeformat *tf = this_cli->tf ?: &atomic_load_explicit(&global_runtime, memory_order_acquire)->tf_proto;
- rcu_read_unlock();
+ struct timeformat *tf = this_cli->tf;
+ if (this_cli->tf)
+ tm_format_time(tbuf, this_cli->tf, p->last_state_change);
+ else
+ {
+ rcu_read_lock();
+ tm_format_time(tbuf, &BIRD_GLOBAL_RUNTIME->tf_proto, p->last_state_change);
+ rcu_read_unlock();
+ }
- tm_format_time(tbuf, tf, p->last_state_change);
cli_msg(-1002, "%-10s %-10s %-10s %-6s %-12s %s",
p->name,
p->proto->name,
static inline u32
proto_get_router_id(struct proto_config *pc)
{
- return pc->router_id ?: atomic_load_explicit(&global_runtime, memory_order_relaxed)->router_id;
+ return pc->router_id ?: BIRD_GLOBAL_RUNTIME->router_id;
}
rcu_read_lock();
struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
- tm_format_time(tbuf, this_cli->tf ?: &gr->tf_proto,
+ tm_format_time(tbuf, this_cli->tf ?: &gr->tf_log,
atomic_load_explicit(&s->last_state_change, memory_order_relaxed));
rcu_read_unlock();
caps->llgr_aware = 1;
rcu_read_lock();
- struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
+ union bird_global_runtime *gr = BIRD_GLOBAL_RUNTIME;
if (p->cf->enable_hostname && gr->hostname)
{
size_t length = strlen(gr->hostname);
{
struct birdsock *sk = p->conn->sk;
ASSERT_DIE(sk->rpos > sk->rbuf);
- sk_resume_rx(p->p.loop, sk, bgp_rx);
+ sk_resume_rx(p->p.loop, sk);
bgp_rx(sk, sk->rpos - sk->rbuf);
BGP_TRACE(D_PACKETS, "Uncorked");
}
mrt_peer_table_dump(struct mrt_table_dump_state *s)
{
mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, MRT_PEER_INDEX_TABLE);
- mrt_peer_table_header(s, OBSREF_GET(config)->router_id, s->table_open->name);
+
+ rcu_read_lock();
+ mrt_peer_table_header(s, BIRD_GLOBAL_RUNTIME->router_id, s->table_open->name);
+ rcu_read_unlock();
/* 0 is fake peer for non-BGP routes */
mrt_peer_table_entry(s, 0, 0, IPA_NONE);
if (fl & IFF_MULTICAST)
f.flags |= IF_MULTICAST;
+ f.cf = kif_get_iface_config(&f);
+
iface = if_update(&f);
if (!scan)
/* KIF misc code */
-void
-kif_sys_start(struct kif_proto *p UNUSED)
-{
-}
-
void
kif_sys_shutdown(struct kif_proto *p)
{
krt_buffer_release(&p->p);
}
-int
-kif_update_sysdep_addr(struct iface *i)
+static int
+kif_update_sysdep_addr_(struct iface *i)
{
static int fd = -1;
return !ip4_equal(i->sysdep, old);
}
+
+void
+kif_sys_start(struct kif_proto *p UNUSED)
+{
+ /* Setup sysdep address updater */
+ kif_update_sysdep_addr = kif_update_sysdep_addr_;
+}
if (kind && !strcmp(kind, "vrf"))
f.flags |= IF_VRF;
+ f.cf = kif_get_iface_config(&f);
+
ifi = if_update(&f);
if (!scan)
if (f.master != i->master)
{
+ f.cf = kif_get_iface_config(&f);
+
memcpy(f.name, i->name, sizeof(f.name));
if_update_locked(&f);
}
kif_sys_shutdown(struct kif_proto *p UNUSED)
{
}
-
-int
-kif_update_sysdep_addr(struct iface *i UNUSED)
-{
- return 0;
-}
-src := alloc.c io.c io-loop.c krt.c log.c main.c random.c domain.c
+src := alloc.c domain.c file.c io-loop.c log.c socket.c random.c time.c
+obj := $(src-o-files)
+$(all-lib)
+
+src := io.c io-cli.c krt.c main.c
obj := $(src-o-files)
$(all-daemon)
$(cf-local)
long page_size = 0;
#ifdef HAVE_MMAP
-# define KEEP_PAGES_MAX 16384
-# define KEEP_PAGES_MIN 32
-# define KEEP_PAGES_MAX_LOCAL 128
-# define ALLOC_PAGES_AT_ONCE 32
- STATIC_ASSERT(KEEP_PAGES_MIN * 4 < KEEP_PAGES_MAX);
- STATIC_ASSERT(ALLOC_PAGES_AT_ONCE < KEEP_PAGES_MAX_LOCAL);
+void
+alloc_preconfig(struct alloc_config *ac)
+{
+ ac->keep_mem_max_global = 16777216;
+ ac->keep_mem_max_local = 524288;
+ ac->at_once = 131072;
+}
+
+# define ALLOC_INFO (&(atomic_load_explicit(&global_runtime, memory_order_relaxed)->alloc))
+# define KEEP_MEM_MAX ALLOC_INFO->keep_mem_max_global
+# define KEEP_MEM_MAX_LOCAL ALLOC_INFO->keep_mem_max_local
+# define ALLOC_MEM_AT_ONCE ALLOC_INFO->at_once
static bool use_fake = 0;
static bool initialized = 0;
# define PROTECT_PAGE(pg)
# define UNPROTECT_PAGE(pg)
-# if DEBUGGING
+# if DEBUG_ALLOCATOR
# ifdef ENABLE_EXPENSIVE_CHECKS
# undef PROTECT_PAGE
# undef UNPROTECT_PAGE
static DOMAIN(resource) empty_pages_domain;
static struct empty_pages *empty_pages = NULL;
- _Atomic int pages_kept_cold = 0;
- _Atomic int pages_kept_cold_index = 0;
+ _Atomic uint pages_kept_cold = 0;
+ _Atomic uint pages_kept_cold_index = 0;
_Atomic int pages_total = 0;
_Atomic int alloc_locking_in_rcu = 0;
static event page_cleanup_event = { .hook = page_cleanup, };
# define SCHEDULE_CLEANUP do if (initialized && !shutting_down) ev_send(&global_event_list, &page_cleanup_event); while (0)
- _Atomic int pages_kept = 0;
- _Atomic int pages_kept_locally = 0;
- static _Thread_local int pages_kept_here = 0;
+ _Atomic uint pages_kept = 0;
+ _Atomic uint pages_kept_locally = 0;
+ static _Thread_local uint pages_kept_here = 0;
static void *
alloc_sys_page(void)
{
- void *ptr = mmap(NULL, page_size * ALLOC_PAGES_AT_ONCE, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ void *ptr = mmap(NULL, ALLOC_MEM_AT_ONCE, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED)
die("mmap(%ld) failed: %m", (s64) page_size);
- atomic_fetch_add_explicit(&pages_total, ALLOC_PAGES_AT_ONCE, memory_order_acq_rel);
+ ASSUME(ALLOC_MEM_AT_ONCE % page_size == 0);
+ atomic_fetch_add_explicit(&pages_total, ALLOC_MEM_AT_ONCE / page_size, memory_order_acq_rel);
return ptr;
}
void *ptr = alloc_sys_page();
ajlog(ptr, NULL, 0, AJT_ALLOC_MMAP);
- for (int i=1; i<ALLOC_PAGES_AT_ONCE; i++)
- free_page(ptr + page_size * i);
+ for (unsigned long skip = page_size; skip<ALLOC_MEM_AT_ONCE; skip += page_size)
+ free_page(ptr + skip);
return ptr;
#endif
#ifdef HAVE_MMAP
/* We primarily try to keep the pages locally. */
struct free_page *fp = ptr;
- if (pages_kept_here < KEEP_PAGES_MAX_LOCAL)
+ if (pages_kept_here * page_size < KEEP_MEM_MAX_LOCAL)
{
struct free_page *next = local_page_stack;
atomic_store_explicit(&fp->next, next, memory_order_relaxed);
ajlog(fp, next, pk, AJT_FREE_GLOBAL_HOT);
/* And if there are too many global hot free pages, we ask for page cleanup */
- if (pk >= KEEP_PAGES_MAX)
+ if (pk * page_size >= KEEP_MEM_MAX)
SCHEDULE_CLEANUP;
#endif
}
/* We first count the pages to enable consistency checking.
* Also, we need to know the last page. */
struct free_page *last = local_page_stack, *next;
- int check_count = 1;
+ uint check_count = 1;
while (next = atomic_load_explicit(&last->next, memory_order_relaxed))
{
check_count++;
/* Check the state of global page cache and maybe schedule its cleanup. */
atomic_fetch_sub_explicit(&pages_kept_locally, check_count, memory_order_relaxed);
- if (atomic_fetch_add_explicit(&pages_kept, check_count, memory_order_relaxed) >= KEEP_PAGES_MAX)
+ if (atomic_fetch_add_explicit(&pages_kept, check_count, memory_order_relaxed) * page_size >= KEEP_MEM_MAX)
SCHEDULE_CLEANUP;
}
/* Pages allocated inbetween */
uint pk = atomic_load_explicit(&pages_kept, memory_order_relaxed);
- if (pk < KEEP_PAGES_MAX)
+ if (pk * page_size < KEEP_MEM_MAX)
return;
/* Walk the pages */
UNLOCK_DOMAIN(resource, empty_pages_domain);
count++;
}
- while (atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX / 2);
+ while (atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) * page_size >= KEEP_MEM_MAX / 2);
ALLOC_TRACE("Moved %u pages to cold storage, now %u cold, %u index", count,
atomic_load_explicit(&pages_kept_cold, memory_order_relaxed),
/* We assume that page size has only one bit and is between 1K and 256K (incl.).
* Otherwise, the assumptions in lib/slab.c (sl_head's num_full range) aren't met. */
+ alloc_preconfig(&(atomic_load_explicit(&global_runtime, memory_order_relaxed)->alloc));
+
empty_pages_domain = DOMAIN_NEW(resource);
DOMAIN_SETUP(resource, empty_pages_domain, "Empty Pages", NULL);
initialized = 1;
+
return;
}
#endif
page_size = 4096;
+ alloc_preconfig(&(atomic_load_explicit(&global_runtime, memory_order_relaxed)->alloc));
+
initialized = 1;
}
CF_KEYWORDS(PING, WAKEUP, SOCKETS, SCHEDULING, EVENTS, TIMERS, ALLOCATOR)
CF_KEYWORDS(GRACEFUL, RESTART, FIXED)
CF_KEYWORDS(THREAD, THREADS, GROUP, MIN, MAX, TIME, LATENCY, DEFAULT)
+CF_KEYWORDS(MEMORY, GLOBAL, LOCAL, KEEP, HOT, ALLOCATE, BLOCK)
%type <i> log_mask log_mask_list log_cat cfg_timeout debug_unix latency_debug_mask latency_debug_flag latency_debug_list
%type <t> cfg_name
conf: debug_unix ;
debug_unix:
- DEBUG LATENCY latency_debug_mask { new_config->latency_debug = $3; }
- | DEBUG LATENCY LIMIT expr_us { new_config->latency_limit = $4; }
- | WATCHDOG WARNING expr_us { new_config->watchdog_warning = $3; }
- | WATCHDOG TIMEOUT expr_us { new_config->watchdog_timeout = ($3 + 999999) TO_S; }
+ DEBUG LATENCY latency_debug_mask { new_config->runtime.latency_debug = $3; }
+ | DEBUG LATENCY LIMIT expr_us { new_config->runtime.latency_limit = $4; }
+ | WATCHDOG WARNING expr_us { new_config->runtime.watchdog_warning = $3; }
+ | WATCHDOG TIMEOUT expr_us { new_config->runtime.watchdog_timeout = ($3 + 999999) TO_S; }
;
latency_debug_mask:
| TIMERS { $$ = DL_TIMERS; }
;
+conf: MEMORY '{' memory_items '}'
+{
+ if (new_config->runtime.alloc.keep_mem_max_global <= new_config->runtime.alloc.keep_mem_max_local)
+ cf_error("Global (%u) hot memory limit must be higher than local (%u)",
+ new_config->runtime.alloc.keep_mem_max_global,
+ new_config->runtime.alloc.keep_mem_max_local);
+
+ if (new_config->runtime.alloc.keep_mem_max_local < new_config->runtime.alloc.at_once)
+ cf_error("Can't allocate more memory at once (%u) than local hot limit (%u)",
+ new_config->runtime.alloc.at_once,
+ new_config->runtime.alloc.keep_mem_max_local);
+}
+
+memory_items:
+ | memory_items GLOBAL KEEP HOT NUM ';' {
+ new_config->runtime.alloc.keep_mem_max_global = BIRD_ALIGN($5, page_size); }
+ | memory_items LOCAL KEEP HOT NUM ';' {
+ new_config->runtime.alloc.keep_mem_max_local = BIRD_ALIGN($5, page_size); }
+ | memory_items ALLOCATE BLOCK NUM ';' {
+ new_config->runtime.alloc.at_once = BIRD_ALIGN($4, page_size); }
+;
+
/* Unix specific commands */
--- /dev/null
+/*
+ * BIRD Internet Routing Daemon -- Tracked Files
+ *
+ * (c) 1998--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Ondrej Filip <feela@network.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+/* Unfortunately, some glibc versions hide parts of RFC 3542 API
+ if _GNU_SOURCE is not defined. */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <poll.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/icmp6.h>
+#include <netdb.h>
+
+#include "nest/bird.h"
+#include "lib/lists.h"
+#include "lib/resource.h"
+#include "lib/socket.h"
+#include "lib/event.h"
+#include "lib/locking.h"
+#include "lib/timer.h"
+#include "lib/string.h"
+#include "nest/cli.h"
+#include "nest/iface.h"
+#include "conf/conf.h"
+
+#include "sysdep/unix/unix.h"
+#include "sysdep/unix/io-loop.h"
+
+/* Maximum number of calls of tx handler for one socket in one
+ * poll iteration. Should be small enough to not monopolize CPU by
+ * one protocol instance.
+ */
+#define MAX_STEPS 4
+
+/* Maximum number of calls of rx handler for all sockets in one poll
+ iteration. RX callbacks are often much more costly so we limit
+ this to gen small latencies */
+#define MAX_RX_STEPS 4
+
+
+/*
+ * Tracked Files
+ */
+
+struct rfile {
+ resource r;
+ struct stat stat;
+ int fd;
+ off_t limit;
+ _Atomic off_t pos;
+ void *mapping;
+};
+
+struct rfile rf_stderr = {
+ .fd = 2,
+};
+
+static void
+rf_free(resource *r)
+{
+ struct rfile *a = (struct rfile *) r;
+
+ if (a->mapping)
+ munmap(a->mapping, a->limit);
+
+ close(a->fd);
+}
+
+static void
+rf_dump(struct dump_request *dreq, resource *r)
+{
+ struct rfile *a = (struct rfile *) r;
+
+ RDUMP("(fd %d)\n", a->fd);
+}
+
+static struct resclass rf_class = {
+ "FILE",
+ sizeof(struct rfile),
+ rf_free,
+ rf_dump,
+ NULL,
+ NULL
+};
+
+int
+rf_fileno(struct rfile *f)
+{
+ return f->fd;
+}
+
+static int
+rf_open_get_fd(const char *name, enum rf_mode mode)
+{
+ int omode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+ int flags;
+
+ switch (mode)
+ {
+ case RF_APPEND:
+ flags = O_WRONLY | O_CREAT | O_APPEND;
+ break;
+
+ case RF_FIXED:
+ flags = O_RDWR | O_CREAT;
+ break;
+
+ default:
+ bug("rf_open() must have the mode set");
+ }
+
+ return open(name, flags, omode);
+}
+
+static void
+rf_stat(struct rfile *r)
+{
+ if (fstat(r->fd, &r->stat) < 0)
+ die("fstat() failed: %m");
+}
+
+struct rfile *
+rf_open(pool *p, const char *name, enum rf_mode mode, off_t limit)
+{
+ int fd = rf_open_get_fd(name, mode);
+ if (fd < 0)
+ return NULL; /* The caller takes care of printing %m. */
+
+ struct rfile *r = ralloc(p, &rf_class);
+ r->fd = fd;
+ r->limit = limit;
+
+ switch (mode)
+ {
+ case RF_APPEND:
+ rf_stat(r);
+ atomic_store_explicit(&r->pos, S_ISREG(r->stat.st_mode) ? r->stat.st_size : 0, memory_order_relaxed);
+ break;
+
+ case RF_FIXED:
+ if ((ftruncate(fd, limit) < 0)
+ || ((r->mapping = mmap(NULL, limit, PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED))
+ {
+ int erf = errno;
+ r->mapping = NULL;
+ rfree(r);
+ errno = erf;
+ return NULL;
+ }
+ break;
+
+ default:
+ bug("rf_open() must have the mode set");
+ }
+
+
+ return r;
+}
+
+off_t
+rf_size(struct rfile *r)
+{
+ return atomic_load_explicit(&r->pos, memory_order_relaxed);
+}
+
+int
+rf_same(struct rfile *a, struct rfile *b)
+{
+ rf_stat(a);
+ rf_stat(b);
+
+ return
+ (a->limit == b->limit) &&
+ (a->stat.st_mode == b->stat.st_mode) &&
+ (a->stat.st_dev == b->stat.st_dev) &&
+ (a->stat.st_ino == b->stat.st_ino);
+}
+
+void
+rf_write_crude(struct rfile *r, const char *buf, int sz)
+{
+ if (r->mapping)
+ memcpy(r->mapping, buf, sz);
+ else
+ write(r->fd, buf, sz);
+}
+
+
+int
+rf_writev(struct rfile *r, struct iovec *iov, int iov_count)
+{
+ off_t size = 0;
+ for (int i = 0; i < iov_count; i++)
+ size += iov[i].iov_len;
+
+ if (r->mapping)
+ {
+ /* Update the pointer */
+ off_t target = atomic_fetch_add_explicit(&r->pos, size, memory_order_relaxed) % r->limit;
+
+ /* Write the line */
+ for (int i = 0; i < iov_count; i++)
+ {
+ /* Take care of wrapping; this should really happen only once */
+ off_t rsz;
+ while ((rsz = r->limit - target) < (off_t) iov[i].iov_len)
+ {
+ memcpy(r->mapping + target, iov[i].iov_base, rsz);
+ iov[i].iov_base += rsz;
+ iov[i].iov_len -= rsz;
+ target = 0;
+ }
+
+ memcpy(r->mapping + target, iov[i].iov_base, iov[i].iov_len);
+ target += iov[i].iov_len;
+ }
+ return 1;
+ }
+ else if (r->limit && (atomic_fetch_add_explicit(&r->pos, size, memory_order_relaxed) + size > r->limit))
+ {
+ atomic_fetch_sub_explicit(&r->pos, size, memory_order_relaxed);
+ return 0;
+ }
+ else
+ {
+ while (size > 0)
+ {
+ /* Try to write */
+ ssize_t e = writev(r->fd, iov, iov_count);
+ if (e < 0)
+ if (errno == EINTR)
+ continue;
+ else
+ return 1; /* FIXME: What should we do when we suddenly can't write? */
+
+ /* It is expected that we always write the whole bunch at once */
+ if (e == size)
+ return 1;
+
+ /* Block split should not happen (we write small enough messages)
+ * but if it happens, let's try to write the rest of the log */
+ size -= e;
+ while (e > 0)
+ {
+ if ((ssize_t) iov[0].iov_len > e)
+ {
+ /* Some bytes are remaining in the first chunk */
+ iov[0].iov_len -= e;
+ iov[0].iov_base += e;
+ break;
+ }
+
+ /* First chunk written completely, get rid of it */
+ e -= iov[0].iov_len;
+ iov++;
+ iov_count--;
+ ASSERT_DIE(iov_count > 0);
+ }
+ }
+
+ return 1;
+ }
+}
+
+/*
+ * Dumping to files
+ */
+
+struct dump_request_file {
+ struct dump_request dr;
+ uint pos, max; int fd;
+ uint last_progress_info;
+ char data[0];
+};
+
+static void
+dump_to_file_flush(struct dump_request_file *req)
+{
+ if (req->fd < 0)
+ return;
+
+ for (uint sent = 0; sent < req->pos; )
+ {
+ int e = write(req->fd, &req->data[sent], req->pos - sent);
+ if (e <= 0)
+ {
+ req->dr.report(&req->dr, 8009, "Failed to write data: %m");
+ close(req->fd);
+ req->fd = -1;
+ return;
+ }
+ sent += e;
+ }
+
+ req->dr.size += req->pos;
+ req->pos = 0;
+
+ for (uint reported = 0; req->dr.size >> req->last_progress_info; req->last_progress_info++)
+ if (!reported++)
+ req->dr.report(&req->dr, -13, "... dumped %lu bytes in %t s",
+ req->dr.size, current_time_now() - req->dr.begin);
+}
+
+static void
+dump_to_file_write(struct dump_request *dr, const char *fmt, ...)
+{
+ struct dump_request_file *req = SKIP_BACK(struct dump_request_file, dr, dr);
+
+ for (uint phase = 0; (req->fd >= 0) && (phase < 2); phase++)
+ {
+ va_list args;
+ va_start(args, fmt);
+ int i = bvsnprintf(&req->data[req->pos], req->max - req->pos, fmt, args);
+ va_end(args);
+
+ if (i >= 0)
+ {
+ req->pos += i;
+ return;
+ }
+ else
+ dump_to_file_flush(req);
+ }
+
+ bug("Too long dump call");
+}
+
+struct dump_request *
+dump_to_file_init(off_t offset)
+{
+ ASSERT_DIE(offset + sizeof(struct dump_request_file) + 1024 < (unsigned long) page_size);
+
+ struct dump_request_file *req = alloc_page() + offset;
+ *req = (struct dump_request_file) {
+ .dr = {
+ .write = dump_to_file_write,
+ .begin = current_time_now(),
+ .offset = offset,
+ },
+ .max = page_size - offset - OFFSETOF(struct dump_request_file, data[0]),
+ .fd = -1,
+ };
+
+ return &req->dr;
+}
+
+void
+dump_to_file_run(struct dump_request *dr, const char *file, const char *what, void (*dump)(struct dump_request *))
+{
+ struct dump_request_file *req = SKIP_BACK(struct dump_request_file, dr, dr);
+ req->fd = open(file, O_CREAT | O_WRONLY | O_EXCL, S_IRUSR);
+
+ if (req->fd < 0)
+ {
+ dr->report(dr, 8009, "Failed to open file %s: %m", file);
+ goto cleanup;
+ }
+
+ dr->report(dr, -13, "Dumping %s to %s", what, file);
+
+ dump(dr);
+
+ if (req->fd >= 0)
+ {
+ dump_to_file_flush(req);
+ close(req->fd);
+ }
+
+ btime end = current_time_now();
+ dr->report(dr, 13, "Dumped %lu bytes in %t s", dr->size, end - dr->begin);
+
+cleanup:
+ free_page(((void *) req) - dr->offset);
+}
+
+struct dump_request_cli {
+ cli *cli;
+ struct dump_request dr;
+};
+
+static void
+cmd_dump_report(struct dump_request *dr, int state, const char *fmt, ...)
+{
+ struct dump_request_cli *req = SKIP_BACK(struct dump_request_cli, dr, dr);
+ va_list args;
+ va_start(args, fmt);
+ cli_vprintf(req->cli, state, fmt, args);
+ va_end(args);
+}
+
+void
+cmd_dump_file(struct cli *cli, const char *file, const char *what, void (*dump)(struct dump_request *))
+{
+ if (cli->restricted)
+ return cli_printf(cli, 8007, "Access denied");
+
+ struct dump_request_cli *req = SKIP_BACK(struct dump_request_cli, dr,
+ dump_to_file_init(OFFSETOF(struct dump_request_cli, dr)));
+
+ req->cli = cli;
+ req->dr.report = cmd_dump_report;
+
+ dump_to_file_run(&req->dr, file, what, dump);
+}
--- /dev/null
+/*
+ * CLI: Show threads
+ */
+
+#include "nest/bird.h"
+
+#include "lib/io-loop.h"
+#include "sysdep/unix/io-loop.h"
+#include "nest/cli.h"
+#include "conf/conf.h"
+
+
+struct bird_thread_show_data {
+ struct bird_thread_syncer sync;
+ cli *cli;
+ linpool *lp;
+ u8 show_loops;
+ uint line_pos;
+ uint line_max;
+ const char **lines;
+};
+
+#define tsd_append(...) do { \
+ if (!tsd->lines) \
+ tsd->lines = mb_allocz(tsd->sync.pool, sizeof(const char *) * tsd->line_max); \
+ if (tsd->line_pos >= tsd->line_max) \
+ tsd->lines = mb_realloc(tsd->lines, sizeof (const char *) * (tsd->line_max *= 2)); \
+ tsd->lines[tsd->line_pos++] = lp_sprintf(tsd->lp, __VA_ARGS__); \
+} while (0)
+
+static void
+bird_thread_show_cli_cont(struct cli *c UNUSED)
+{
+ /* Explicitly do nothing to prevent CLI from trying to parse another command. */
+}
+
+static bool
+bird_thread_show_cli_cleanup(struct cli *c UNUSED)
+{
+ /* Defer the cleanup until the writeout is finished. */
+ return false;
+}
+
+static void
+bird_thread_show_spent_time(struct bird_thread_show_data *tsd, const char *name, struct spent_time *st)
+{
+ char b[TIME_BY_SEC_SIZE * sizeof("1234567890, ")], *bptr = b, *bend = b + sizeof(b);
+ uint cs = CURRENT_SEC;
+ uint fs = NSEC_TO_SEC(st->last_written_ns);
+
+ for (uint i = 0; i <= cs && i < TIME_BY_SEC_SIZE; i++)
+ bptr += bsnprintf(bptr, bend - bptr, "% 10lu ",
+ (cs - i > fs) ? 0 : st->by_sec_ns[(cs - i) % TIME_BY_SEC_SIZE]);
+ bptr[-1] = 0; /* Drop the trailing space */
+
+ tsd_append(" %s total time: % 9t s; last %d secs [ns]: %s", name, st->total_ns NS, MIN(CURRENT_SEC+1, TIME_BY_SEC_SIZE), b);
+}
+
+static void
+bird_thread_show_loop(struct bird_thread_show_data *tsd, struct birdloop *loop)
+{
+ tsd_append(" Loop %s", domain_name(loop->time.domain));
+ bird_thread_show_spent_time(tsd, "Working ", &loop->working);
+ bird_thread_show_spent_time(tsd, "Locking ", &loop->locking);
+}
+
+static void
+bird_thread_show(struct bird_thread_syncer *sync)
+{
+ SKIP_BACK_DECLARE(struct bird_thread_show_data, tsd, sync, sync);
+
+ if (!tsd->lp)
+ tsd->lp = lp_new(tsd->sync.pool);
+
+ if (tsd->show_loops)
+ tsd_append("Thread %04x %s (busy counter %d)", THIS_THREAD_ID, this_thread->busy_active ? " [busy]" : "", this_thread->busy_counter);
+
+ u64 total_time_ns = 0;
+ WALK_TLIST(birdloop, loop, &this_thread->loops)
+ {
+ if (tsd->show_loops)
+ bird_thread_show_loop(tsd, loop);
+
+ total_time_ns += loop->working.total_ns + loop->locking.total_ns;
+ }
+
+ if (tsd->show_loops)
+ {
+ tsd_append(" Total working time: %t", total_time_ns NS);
+ bird_thread_show_spent_time(tsd, "Overhead", &this_thread->overhead);
+ bird_thread_show_spent_time(tsd, "Idle ", &this_thread->idle);
+ }
+ else
+ tsd_append("%04x%s % 9.3t s % 9.3t s % 9.3t s",
+ THIS_THREAD_ID, this_thread->busy_active ? " [busy]" : " ",
+ total_time_ns NS, this_thread->overhead.total_ns NS,
+ (ns_now() - this_thread->meta->last_transition_ns) NS);
+}
+
+static void
+cmd_show_threads_done(struct bird_thread_syncer *sync)
+{
+ SKIP_BACK_DECLARE(struct bird_thread_show_data, tsd, sync, sync);
+ ASSERT_DIE(birdloop_inside(&main_birdloop));
+
+ /* The client lost their patience and dropped the session early. */
+ if (!tsd->cli->sock)
+ {
+ mb_free(tsd);
+ rp_free(tsd->cli->pool);
+ return;
+ }
+
+ tsd->cli->cont = NULL;
+ tsd->cli->cleanup = NULL;
+
+ for (int i=0; i<2; i++)
+ {
+ struct birdloop_pickup_group *group = &pickup_groups[i];
+
+ LOCK_DOMAIN(attrs, group->domain);
+ uint count = 0;
+ u64 total_time_ns = 0;
+ if (!EMPTY_LIST(group->loops))
+ {
+ if (tsd->show_loops)
+ tsd_append("Unassigned loops in group %d:", i);
+
+ struct birdloop *loop;
+ WALK_LIST(loop, group->loops)
+ {
+ if (tsd->show_loops)
+ bird_thread_show_loop(tsd, loop);
+
+ total_time_ns += loop->working.total_ns + loop->locking.total_ns;
+ count++;
+ }
+
+ if (tsd->show_loops)
+ tsd_append(" Total working time: %t", total_time_ns NS);
+ else
+ tsd_append("Unassigned %d loops in group %d, total time %t", count, i, total_time_ns NS);
+ }
+ else
+ tsd_append("All loops in group %d are assigned.", i);
+
+ UNLOCK_DOMAIN(attrs, group->domain);
+ }
+
+ if (!tsd->show_loops)
+ cli_printf(tsd->cli, -1027, "Thread ID Working Overhead Last Pickup/Drop");
+
+ for (uint i = 0; i < tsd->line_pos - 1; i++)
+ cli_printf(tsd->cli, -1027, "%s", tsd->lines[i]);
+
+ cli_printf(tsd->cli, 1027, "%s", tsd->lines[tsd->line_pos-1]);
+ cli_write_trigger(tsd->cli);
+ mb_free(tsd);
+}
+
+void
+cmd_show_threads(int show_loops)
+{
+ struct bird_thread_show_data *tsd = mb_allocz(&root_pool, sizeof(struct bird_thread_show_data));
+ tsd->cli = this_cli;
+ tsd->show_loops = show_loops;
+ tsd->line_pos = 0;
+ tsd->line_max = 64;
+
+ this_cli->cont = bird_thread_show_cli_cont;
+ this_cli->cleanup = bird_thread_show_cli_cleanup;
+
+ bird_thread_sync_all(&tsd->sync, bird_thread_show, cmd_show_threads_done, "Show Threads");
+}
* Can be freely distributed and used under the terms of the GNU GPL.
*/
+#define _GNU_SOURCE
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "lib/io-loop.h"
#include "sysdep/unix/io-loop.h"
#include "conf/conf.h"
-#include "nest/cli.h"
#define THREAD_STACK_SIZE 65536 /* To be lowered in near future */
bug("clock_gettime: %m");
}
-#define NSEC_IN_SEC ((u64) (1000 * 1000 * 1000))
-
u64 ns_now(void)
{
struct timespec ts;
return (u64) (ts.tv_sec - ns_begin.tv_sec) * NSEC_IN_SEC + ts.tv_nsec - ns_begin.tv_nsec;
}
-#define NSEC_TO_SEC(x) ((x) / NSEC_IN_SEC)
-#define CURRENT_SEC NSEC_TO_SEC(ns_now())
-
static _Thread_local struct spent_time *account_target_spent_time;
static _Thread_local u64 *account_target_total;
static _Thread_local u64 account_last;
void
pipe_new(struct pipe *p)
{
+ int flags = O_NONBLOCK | O_CLOEXEC;
+#if HAVE_PIPE2
+ int rv = pipe2(p->fd, flags);
+ if (rv < 0)
+ die("pipe2: %m");
+#else
int rv = pipe(p->fd);
if (rv < 0)
die("pipe: %m");
- if (fcntl(p->fd[0], F_SETFL, O_NONBLOCK) < 0)
+ if (fcntl(p->fd[0], F_SETFL, flags) < 0)
die("fcntl(O_NONBLOCK): %m");
- if (fcntl(p->fd[1], F_SETFL, O_NONBLOCK) < 0)
+ if (fcntl(p->fd[1], F_SETFL, flags) < 0)
die("fcntl(O_NONBLOCK): %m");
+#endif
}
void
pipe_free(&loop->wakeup);
}
+static inline void
+wakeup_forked(struct bird_thread *thr)
+{
+ struct pipe new;
+ pipe_new(&new);
+
+ /* This is kinda sketchy but there is probably
+ * no actual architecture where copying an int
+ * would create an invalid inbetween value */
+ struct pipe old = thr->wakeup;
+ thr->wakeup = new;
+ synchronize_rcu();
+
+ pipe_free(&old);
+}
+
static inline bool
birdloop_try_ping(struct birdloop *loop, u32 ltt)
{
socket_changed(s);
}
-extern sock *stored_sock; /* mainloop hack */
+sock *stored_sock; /* mainloop hack */
void
birdloop_remove_socket(struct birdloop *loop, sock *s)
sk_pause_rx(struct birdloop *loop, sock *s)
{
ASSERT_DIE(birdloop_inside(loop));
+ ASSERT_DIE(!s->rx_paused);
+ ASSERT_DIE(s->rx_hook);
+ s->rx_paused = s->rx_hook;
s->rx_hook = NULL;
socket_changed(s);
}
void
-sk_resume_rx(struct birdloop *loop, sock *s, int (*hook)(sock *, uint))
+sk_resume_rx(struct birdloop *loop, sock *s)
{
ASSERT_DIE(birdloop_inside(loop));
- ASSERT_DIE(hook);
- s->rx_hook = hook;
+ ASSERT_DIE(s->rx_paused);
+ ASSERT_DIE(!s->rx_hook);
+ s->rx_hook = s->rx_paused;
+ s->rx_paused = NULL;
socket_changed(s);
}
const struct thread_group_config *cf;
};
+struct birdloop_pickup_group pickup_groups[2] = {
+ {
+ /* all zeroes */
+ .start_threads.hook = bird_thread_start_event,
+ },
+ {
+ /* FIXME: make this dynamic, now it copies the loop_max_latency value from proto/bfd/config.Y */
+ .max_latency = 10 MS,
+ .start_threads.hook = bird_thread_start_event,
+ .start_threads.data = &pickup_groups[1],
+ },
+};
+
typedef union thread_group_public {
struct { THREAD_GROUP_PUBLIC; };
struct thread_group_private priv;
thread_group_shutdown = {};
-static _Thread_local struct bird_thread *this_thread;
+_Thread_local struct bird_thread *this_thread;
static void bird_thread_busy_set(struct thread_group_private *, int val);
int timeout;
/* Schedule all loops with timed out timers */
- timers_fire(&thr->meta->time, 0);
+ timers_fire(&thr->meta->time);
/* Pickup new loops */
birdloop_balancer();
if (new->shutdown)
return;
+ if (!new->thread_count)
+ new->thread_count = 1;
+
/* First, we match the new config to the existing groups */
WALK_TLIST(thread_group_config, tgc, &new->thread_group)
{
sockets_fire(loop, 0, 1);
/* Run timers */
- timers_fire(&loop->time, 0);
+ timers_fire(&loop->time);
/* Run events */
repeat = ev_run_list(&loop->event_list);
else
ev_send(&this_birdloop->defer_list, e);
}
+
+/*
+ * Minimalist mainloop with no sockets
+ */
+
+void
+birdloop_minimalist_main(void)
+{
+ /* In case we got forked (hack for Flock) */
+ wakeup_forked(&main_thread);
+
+ while (1)
+ {
+ /* Unset ping information */
+ atomic_fetch_and_explicit(&main_birdloop.thread_transition, ~LTT_PING, memory_order_acq_rel);
+
+ times_update();
+ ev_run_list(&global_event_list);
+ ev_run_list(&global_work_list);
+ ev_run_list(&main_birdloop.event_list);
+ timers_fire(&main_birdloop.time);
+
+ bool events =
+ !ev_list_empty(&global_event_list) ||
+ !ev_list_empty(&global_work_list) ||
+ !ev_list_empty(&main_birdloop.event_list);
+
+ int poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
+ timer *t;
+ if (t = timers_first(&main_birdloop.time))
+ {
+ times_update();
+ int timeout = (tm_remains(t) TO_MS) + 1;
+ poll_tout = MIN(poll_tout, timeout);
+ }
+
+ struct pollfd pfd = {
+ .fd = main_birdloop.thread->wakeup.fd[0],
+ .events = POLLIN,
+ };
+
+ int rv = poll(&pfd, 1, poll_tout);
+ if ((rv < 0) && (errno != EINTR) && (errno != EAGAIN))
+ bug("poll in main birdloop: %m");
+
+ /* Drain wakeup fd */
+ if (pfd.revents & POLLIN)
+ {
+ THREAD_TRACE(DL_WAKEUP, "Ping received");
+ ASSERT_DIE(rv == 1);
+ wakeup_drain(main_birdloop.thread);
+ }
+ }
+}
};
#include "lib/tlists.h"
+extern _Thread_local struct bird_thread *this_thread;
struct bird_thread_syncer {
pool *pool;
void (*hook)(struct bird_thread_syncer *),
void (*done)(struct bird_thread_syncer *), const char *name);
+struct birdloop_pickup_group {
+ DOMAIN(attrs) domain;
+ list loops;
+ list threads;
+ uint thread_count;
+ uint thread_busy_count;
+ uint loop_count;
+ uint loop_unassigned_count;
+ btime max_latency;
+ event start_threads;
+};
+
+extern struct birdloop_pickup_group pickup_groups[2];
+
#endif
#include "sysdep/unix/unix.h"
#include "sysdep/unix/io-loop.h"
-#include CONFIG_INCLUDE_SYSIO_H
/* Maximum number of calls of tx handler for one socket in one
* poll iteration. Should be small enough to not monopolize CPU by
this to gen small latencies */
#define MAX_RX_STEPS 4
-
-/*
- * Tracked Files
- */
-
-struct rfile {
- resource r;
- struct stat stat;
- int fd;
- off_t limit;
- _Atomic off_t pos;
- void *mapping;
-};
-
-struct rfile rf_stderr = {
- .fd = 2,
-};
-
-static void
-rf_free(resource *r)
-{
- struct rfile *a = (struct rfile *) r;
-
- if (a->mapping)
- munmap(a->mapping, a->limit);
-
- close(a->fd);
-}
-
-static void
-rf_dump(struct dump_request *dreq, resource *r)
-{
- struct rfile *a = (struct rfile *) r;
-
- RDUMP("(fd %d)\n", a->fd);
-}
-
-static struct resclass rf_class = {
- "FILE",
- sizeof(struct rfile),
- rf_free,
- rf_dump,
- NULL,
- NULL
-};
-
-int
-rf_fileno(struct rfile *f)
-{
- return f->fd;
-}
-
-static int
-rf_open_get_fd(const char *name, enum rf_mode mode)
-{
- int omode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
- int flags;
-
- switch (mode)
- {
- case RF_APPEND:
- flags = O_WRONLY | O_CREAT | O_APPEND;
- break;
-
- case RF_FIXED:
- flags = O_RDWR | O_CREAT;
- break;
-
- default:
- bug("rf_open() must have the mode set");
- }
-
- return open(name, flags, omode);
-}
-
-static void
-rf_stat(struct rfile *r)
-{
- if (fstat(r->fd, &r->stat) < 0)
- die("fstat() failed: %m");
-}
-
-struct rfile *
-rf_open(pool *p, const char *name, enum rf_mode mode, off_t limit)
-{
- int fd = rf_open_get_fd(name, mode);
- if (fd < 0)
- return NULL; /* The caller takes care of printing %m. */
-
- struct rfile *r = ralloc(p, &rf_class);
- r->fd = fd;
- r->limit = limit;
-
- switch (mode)
- {
- case RF_APPEND:
- rf_stat(r);
- atomic_store_explicit(&r->pos, S_ISREG(r->stat.st_mode) ? r->stat.st_size : 0, memory_order_relaxed);
- break;
-
- case RF_FIXED:
- if ((ftruncate(fd, limit) < 0)
- || ((r->mapping = mmap(NULL, limit, PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED))
- {
- int erf = errno;
- r->mapping = NULL;
- rfree(r);
- errno = erf;
- return NULL;
- }
- break;
-
- default:
- bug("rf_open() must have the mode set");
- }
-
-
- return r;
-}
-
-off_t
-rf_size(struct rfile *r)
-{
- return atomic_load_explicit(&r->pos, memory_order_relaxed);
-}
-
-int
-rf_same(struct rfile *a, struct rfile *b)
-{
- rf_stat(a);
- rf_stat(b);
-
- return
- (a->limit == b->limit) &&
- (a->stat.st_mode == b->stat.st_mode) &&
- (a->stat.st_dev == b->stat.st_dev) &&
- (a->stat.st_ino == b->stat.st_ino);
-}
-
-void
-rf_write_crude(struct rfile *r, const char *buf, int sz)
-{
- if (r->mapping)
- memcpy(r->mapping, buf, sz);
- else
- write(r->fd, buf, sz);
-}
-
-
-int
-rf_writev(struct rfile *r, struct iovec *iov, int iov_count)
-{
- off_t size = 0;
- for (int i = 0; i < iov_count; i++)
- size += iov[i].iov_len;
-
- if (r->mapping)
- {
- /* Update the pointer */
- off_t target = atomic_fetch_add_explicit(&r->pos, size, memory_order_relaxed) % r->limit;
-
- /* Write the line */
- for (int i = 0; i < iov_count; i++)
- {
- /* Take care of wrapping; this should really happen only once */
- off_t rsz;
- while ((rsz = r->limit - target) < (off_t) iov[i].iov_len)
- {
- memcpy(r->mapping + target, iov[i].iov_base, rsz);
- iov[i].iov_base += rsz;
- iov[i].iov_len -= rsz;
- target = 0;
- }
-
- memcpy(r->mapping + target, iov[i].iov_base, iov[i].iov_len);
- target += iov[i].iov_len;
- }
- return 1;
- }
- else if (r->limit && (atomic_fetch_add_explicit(&r->pos, size, memory_order_relaxed) + size > r->limit))
- {
- atomic_fetch_sub_explicit(&r->pos, size, memory_order_relaxed);
- return 0;
- }
- else
- {
- while (size > 0)
- {
- /* Try to write */
- ssize_t e = writev(r->fd, iov, iov_count);
- if (e < 0)
- if (errno == EINTR)
- continue;
- else
- return 1; /* FIXME: What should we do when we suddenly can't write? */
-
- /* It is expected that we always write the whole bunch at once */
- if (e == size)
- return 1;
-
- /* Block split should not happen (we write small enough messages)
- * but if it happens, let's try to write the rest of the log */
- size -= e;
- while (e > 0)
- {
- if ((ssize_t) iov[0].iov_len > e)
- {
- /* Some bytes are remaining in the first chunk */
- iov[0].iov_len -= e;
- iov[0].iov_base += e;
- break;
- }
-
- /* First chunk written completely, get rid of it */
- e -= iov[0].iov_len;
- iov++;
- iov_count--;
- ASSERT_DIE(iov_count > 0);
- }
- }
-
- return 1;
- }
-}
-
-/*
- * Dumping to files
- */
-
-struct dump_request_file {
- struct dump_request dr;
- uint pos, max; int fd;
- uint last_progress_info;
- char data[0];
-};
-
-static void
-dump_to_file_flush(struct dump_request_file *req)
-{
- if (req->fd < 0)
- return;
-
- for (uint sent = 0; sent < req->pos; )
- {
- int e = write(req->fd, &req->data[sent], req->pos - sent);
- if (e <= 0)
- {
- req->dr.report(&req->dr, 8009, "Failed to write data: %m");
- close(req->fd);
- req->fd = -1;
- return;
- }
- sent += e;
- }
-
- req->dr.size += req->pos;
- req->pos = 0;
-
- for (uint reported = 0; req->dr.size >> req->last_progress_info; req->last_progress_info++)
- if (!reported++)
- req->dr.report(&req->dr, -13, "... dumped %lu bytes in %t s",
- req->dr.size, current_time_now() - req->dr.begin);
-}
-
-static void
-dump_to_file_write(struct dump_request *dr, const char *fmt, ...)
-{
- struct dump_request_file *req = SKIP_BACK(struct dump_request_file, dr, dr);
-
- for (uint phase = 0; (req->fd >= 0) && (phase < 2); phase++)
- {
- va_list args;
- va_start(args, fmt);
- int i = bvsnprintf(&req->data[req->pos], req->max - req->pos, fmt, args);
- va_end(args);
-
- if (i >= 0)
- {
- req->pos += i;
- return;
- }
- else
- dump_to_file_flush(req);
- }
-
- bug("Too long dump call");
-}
-
-struct dump_request *
-dump_to_file_init(off_t offset)
-{
- ASSERT_DIE(offset + sizeof(struct dump_request_file) + 1024 < (unsigned long) page_size);
-
- struct dump_request_file *req = alloc_page() + offset;
- *req = (struct dump_request_file) {
- .dr = {
- .write = dump_to_file_write,
- .begin = current_time_now(),
- .offset = offset,
- },
- .max = page_size - offset - OFFSETOF(struct dump_request_file, data[0]),
- .fd = -1,
- };
-
- return &req->dr;
-}
-
-void
-dump_to_file_run(struct dump_request *dr, const char *file, const char *what, void (*dump)(struct dump_request *))
-{
- struct dump_request_file *req = SKIP_BACK(struct dump_request_file, dr, dr);
- req->fd = open(file, O_CREAT | O_WRONLY | O_EXCL, S_IRUSR);
-
- if (req->fd < 0)
- {
- dr->report(dr, 8009, "Failed to open file %s: %m", file);
- goto cleanup;
- }
-
- dr->report(dr, -13, "Dumping %s to %s", what, file);
-
- dump(dr);
-
- if (req->fd >= 0)
- {
- dump_to_file_flush(req);
- close(req->fd);
- }
-
- btime end = current_time_now();
- dr->report(dr, 13, "Dumped %lu bytes in %t s", dr->size, end - dr->begin);
-
-cleanup:
- free_page(((void *) req) - dr->offset);
-}
-
-struct dump_request_cli {
- cli *cli;
- struct dump_request dr;
-};
-
-static void
-cmd_dump_report(struct dump_request *dr, int state, const char *fmt, ...)
-{
- struct dump_request_cli *req = SKIP_BACK(struct dump_request_cli, dr, dr);
- va_list args;
- va_start(args, fmt);
- cli_vprintf(req->cli, state, fmt, args);
- va_end(args);
-}
-
-void
-cmd_dump_file(struct cli *cli, const char *file, const char *what, void (*dump)(struct dump_request *))
-{
- if (cli->restricted)
- return cli_printf(cli, 8007, "Access denied");
-
- struct dump_request_cli *req = SKIP_BACK(struct dump_request_cli, dr,
- dump_to_file_init(OFFSETOF(struct dump_request_cli, dr)));
-
- req->cli = cli;
- req->dr.report = cmd_dump_report;
-
- dump_to_file_run(&req->dr, file, what, dump);
-}
-
-
-/*
- * Time clock
- */
-
-btime boot_time;
-
-
-void
-times_update(void)
-{
- struct timespec ts;
- int rv;
-
- btime old_time = current_time();
- btime old_real_time = current_real_time();
-
- rv = clock_gettime(CLOCK_MONOTONIC, &ts);
- if (rv < 0)
- die("Monotonic clock is missing");
-
- if ((ts.tv_sec < 0) || (((u64) ts.tv_sec) > ((u64) 1 << 40)))
- log(L_WARN "Monotonic clock is crazy");
-
- btime new_time = ts.tv_sec S + ts.tv_nsec NS;
-
- if (new_time < old_time)
- log(L_ERR "Monotonic clock is broken");
-
- rv = clock_gettime(CLOCK_REALTIME, &ts);
- if (rv < 0)
- die("clock_gettime: %m");
-
- btime new_real_time = ts.tv_sec S + ts.tv_nsec NS;
-
- if (!atomic_compare_exchange_strong_explicit(
- &last_time,
- &old_time,
- new_time,
- memory_order_acq_rel,
- memory_order_relaxed))
- DBG("Time update collision: last_time");
-
- if (!atomic_compare_exchange_strong_explicit(
- &real_time,
- &old_real_time,
- new_real_time,
- memory_order_acq_rel,
- memory_order_relaxed))
- DBG("Time update collision: real_time");
-}
-
-btime
-current_time_now(void)
-{
- struct timespec ts;
- int rv;
-
- rv = clock_gettime(CLOCK_MONOTONIC, &ts);
- if (rv < 0)
- die("clock_gettime: %m");
-
- return ts.tv_sec S + ts.tv_nsec NS;
-}
-
-/**
- * DOC: Sockets
- *
- * Socket resources represent network connections. Their data structure (&socket)
- * contains a lot of fields defining the exact type of the socket, the local and
- * remote addresses and ports, pointers to socket buffers and finally pointers to
- * hook functions to be called when new data have arrived to the receive buffer
- * (@rx_hook), when the contents of the transmit buffer have been transmitted
- * (@tx_hook) and when an error or connection close occurs (@err_hook).
- *
- * Freeing of sockets from inside socket hooks is perfectly safe.
- */
-
-#ifndef SOL_IP
-#define SOL_IP IPPROTO_IP
-#endif
-
-#ifndef SOL_IPV6
-#define SOL_IPV6 IPPROTO_IPV6
-#endif
-
-#ifndef SOL_ICMPV6
-#define SOL_ICMPV6 IPPROTO_ICMPV6
-#endif
-
-
-/*
- * Sockaddr helper functions
- */
-
-static inline int UNUSED sockaddr_length(int af)
-{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
-
-static inline void
-sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
-{
- memset(sa, 0, sizeof(struct sockaddr_in));
-#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
- sa->sin_len = sizeof(struct sockaddr_in);
-#endif
- sa->sin_family = AF_INET;
- sa->sin_port = htons(port);
- sa->sin_addr = ipa_to_in4(a);
-}
-
-static inline void
-sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
-{
- memset(sa, 0, sizeof(struct sockaddr_in6));
-#ifdef SIN6_LEN
- sa->sin6_len = sizeof(struct sockaddr_in6);
-#endif
- sa->sin6_family = AF_INET6;
- sa->sin6_port = htons(port);
- sa->sin6_flowinfo = 0;
- sa->sin6_addr = ipa_to_in6(a);
-
- if (ifa && ipa_is_link_local(a))
- sa->sin6_scope_id = ifa->index;
-}
-
-void
-sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
-{
- if (af == AF_INET)
- sockaddr_fill4((struct sockaddr_in *) sa, a, port);
- else if (af == AF_INET6)
- sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
- else
- bug("Unknown AF");
-}
-
-static inline void
-sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
-{
- *port = ntohs(sa->sin_port);
- *a = ipa_from_in4(sa->sin_addr);
-}
-
-static inline void
-sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
-{
- *port = ntohs(sa->sin6_port);
- *a = ipa_from_in6(sa->sin6_addr);
-
- if (ifa && ipa_is_link_local(*a))
- *ifa = if_find_by_index(sa->sin6_scope_id);
-}
-
-int
-sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
-{
- if (sa->sa.sa_family != af)
- goto fail;
-
- if (af == AF_INET)
- sockaddr_read4((struct sockaddr_in *) sa, a, port);
- else if (af == AF_INET6)
- sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
- else
- goto fail;
-
- return 0;
-
- fail:
- *a = IPA_NONE;
- *port = 0;
- return -1;
-}
-
-
-/*
- * IPv6 multicast syscalls
- */
-
-/* Fortunately standardized in RFC 3493 */
-
-#define INIT_MREQ6(maddr,ifa) \
- { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
-
-static inline int
-sk_setup_multicast6(sock *s)
-{
- int index = s->iface->index;
- int ttl = s->ttl;
- int n = 0;
-
- if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
- ERR("IPV6_MULTICAST_IF");
-
- if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
- ERR("IPV6_MULTICAST_HOPS");
-
- if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
- ERR("IPV6_MULTICAST_LOOP");
-
- return 0;
-}
-
-static inline int
-sk_join_group6(sock *s, ip_addr maddr)
-{
- struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
-
- if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
- ERR("IPV6_JOIN_GROUP");
-
- return 0;
-}
-
-static inline int
-sk_leave_group6(sock *s, ip_addr maddr)
-{
- struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
-
- if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
- ERR("IPV6_LEAVE_GROUP");
-
- return 0;
-}
-
-
-/*
- * IPv6 packet control messages
- */
-
-/* Also standardized, in RFC 3542 */
-
-/*
- * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
- * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
- * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
- * RFC and we use IPV6_PKTINFO.
- */
-#ifndef IPV6_RECVPKTINFO
-#define IPV6_RECVPKTINFO IPV6_PKTINFO
-#endif
-/*
- * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
- */
-#ifndef IPV6_RECVHOPLIMIT
-#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
-#endif
-
-
-#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
-#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
-
-static inline int
-sk_request_cmsg6_pktinfo(sock *s)
-{
- int y = 1;
-
- if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
- ERR("IPV6_RECVPKTINFO");
-
- return 0;
-}
-
-static inline int
-sk_request_cmsg6_ttl(sock *s)
-{
- int y = 1;
-
- if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
- ERR("IPV6_RECVHOPLIMIT");
-
- return 0;
-}
-
-static inline void
-sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
-{
- if (cm->cmsg_type == IPV6_PKTINFO)
- {
- struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
- s->laddr = ipa_from_in6(pi->ipi6_addr);
- s->lifindex = pi->ipi6_ifindex;
- }
-}
-
-static inline void
-sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
-{
- if (cm->cmsg_type == IPV6_HOPLIMIT)
- s->rcv_ttl = * (int *) CMSG_DATA(cm);
-}
-
-static inline void
-sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
-{
- struct cmsghdr *cm;
- struct in6_pktinfo *pi;
- int controllen = 0;
-
- msg->msg_control = cbuf;
- msg->msg_controllen = cbuflen;
-
- cm = CMSG_FIRSTHDR(msg);
- cm->cmsg_level = SOL_IPV6;
- cm->cmsg_type = IPV6_PKTINFO;
- cm->cmsg_len = CMSG_LEN(sizeof(*pi));
- controllen += CMSG_SPACE(sizeof(*pi));
-
- pi = (struct in6_pktinfo *) CMSG_DATA(cm);
- pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
- pi->ipi6_addr = ipa_to_in6(s->saddr);
-
- msg->msg_controllen = controllen;
-}
-
-
-/*
- * Miscellaneous socket syscalls
- */
-
-static inline int
-sk_set_ttl4(sock *s, int ttl)
-{
- if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
- ERR("IP_TTL");
-
- return 0;
-}
-
-static inline int
-sk_set_ttl6(sock *s, int ttl)
-{
- if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
- ERR("IPV6_UNICAST_HOPS");
-
- return 0;
-}
-
-static inline int
-sk_set_tos4(sock *s, int tos)
-{
- if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
- ERR("IP_TOS");
-
- return 0;
-}
-
-static inline int
-sk_set_tos6(sock *s, int tos)
-{
- if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
- ERR("IPV6_TCLASS");
-
- return 0;
-}
-
-static inline int
-sk_set_high_port(sock *s UNUSED)
-{
- /* Port range setting is optional, ignore it if not supported */
-
-#ifdef IP_PORTRANGE
- if (sk_is_ipv4(s))
- {
- int range = IP_PORTRANGE_HIGH;
- if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
- ERR("IP_PORTRANGE");
- }
-#endif
-
-#ifdef IPV6_PORTRANGE
- if (sk_is_ipv6(s))
- {
- int range = IPV6_PORTRANGE_HIGH;
- if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
- ERR("IPV6_PORTRANGE");
- }
-#endif
-
- return 0;
-}
-
-static inline int
-sk_set_min_rcvbuf_(sock *s, int bufsize)
-{
- int oldsize = 0, oldsize_s = sizeof(oldsize);
-
- if (getsockopt(s->fd, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldsize_s) < 0)
- ERR("SO_RCVBUF");
-
- if (oldsize >= bufsize)
- return 0;
-
- bufsize = BIRD_ALIGN(bufsize, 64);
- if (setsockopt(s->fd, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)) < 0)
- ERR("SO_RCVBUF");
-
- /*
- int newsize = 0, newsize_s = sizeof(newsize);
- if (getsockopt(s->fd, SOL_SOCKET, SO_RCVBUF, &newsize, &newsize_s) < 0)
- ERR("SO_RCVBUF");
-
- log(L_INFO "Setting rcvbuf on %s from %d to %d",
- s->iface ? s->iface->name : "*", oldsize, newsize);
- */
-
- return 0;
-}
-
-static void
-sk_set_min_rcvbuf(sock *s, int bufsize)
-{
- if (sk_set_min_rcvbuf_(s, bufsize) < 0)
- log(L_WARN "Socket error: %s%#m", s->err);
-}
-
-static inline byte *
-sk_skip_ip_header(byte *pkt, int *len)
-{
- if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
- return NULL;
-
- int hlen = (*pkt & 0x0f) * 4;
- if ((hlen < 20) || (hlen > *len))
- return NULL;
-
- *len -= hlen;
- return pkt + hlen;
-}
-
-byte *
-sk_rx_buffer(sock *s, int *len)
-{
- if (sk_is_ipv4(s) && (s->type == SK_IP))
- return sk_skip_ip_header(s->rbuf, len);
- else
- return s->rbuf;
-}
-
-
-/*
- * Public socket functions
- */
-
-/**
- * sk_setup_multicast - enable multicast for given socket
- * @s: socket
- *
- * Prepare transmission of multicast packets for given datagram socket.
- * The socket must have defined @iface.
- *
- * Result: 0 for success, -1 for an error.
- */
-
-int
-sk_setup_multicast(sock *s)
-{
- ASSERT(s->iface);
-
- if (sk_is_ipv4(s))
- return sk_setup_multicast4(s);
- else
- return sk_setup_multicast6(s);
-}
-
-/**
- * sk_join_group - join multicast group for given socket
- * @s: socket
- * @maddr: multicast address
- *
- * Join multicast group for given datagram socket and associated interface.
- * The socket must have defined @iface.
- *
- * Result: 0 for success, -1 for an error.
- */
-
-int
-sk_join_group(sock *s, ip_addr maddr)
-{
- if (sk_is_ipv4(s))
- return sk_join_group4(s, maddr);
- else
- return sk_join_group6(s, maddr);
-}
-
-/**
- * sk_leave_group - leave multicast group for given socket
- * @s: socket
- * @maddr: multicast address
- *
- * Leave multicast group for given datagram socket and associated interface.
- * The socket must have defined @iface.
- *
- * Result: 0 for success, -1 for an error.
- */
-
-int
-sk_leave_group(sock *s, ip_addr maddr)
-{
- if (sk_is_ipv4(s))
- return sk_leave_group4(s, maddr);
- else
- return sk_leave_group6(s, maddr);
-}
-
-/**
- * sk_setup_broadcast - enable broadcast for given socket
- * @s: socket
- *
- * Allow reception and transmission of broadcast packets for given datagram
- * socket. The socket must have defined @iface. For transmission, packets should
- * be send to @brd address of @iface.
- *
- * Result: 0 for success, -1 for an error.
- */
-
-int
-sk_setup_broadcast(sock *s)
-{
- int y = 1;
-
- if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
- ERR("SO_BROADCAST");
-
- return 0;
-}
-
-/**
- * sk_set_ttl - set transmit TTL for given socket
- * @s: socket
- * @ttl: TTL value
- *
- * Set TTL for already opened connections when TTL was not set before. Useful
- * for accepted connections when different ones should have different TTL.
- *
- * Result: 0 for success, -1 for an error.
- */
-
-int
-sk_set_ttl(sock *s, int ttl)
-{
- s->ttl = ttl;
-
- if (sk_is_ipv4(s))
- return sk_set_ttl4(s, ttl);
- else
- return sk_set_ttl6(s, ttl);
-}
-
-/**
- * sk_set_min_ttl - set minimal accepted TTL for given socket
- * @s: socket
- * @ttl: TTL value
- *
- * Set minimal accepted TTL for given socket. Can be used for TTL security.
- * implementations.
- *
- * Result: 0 for success, -1 for an error.
- */
-
-int
-sk_set_min_ttl(sock *s, int ttl)
-{
- if (sk_is_ipv4(s))
- return sk_set_min_ttl4(s, ttl);
- else
- return sk_set_min_ttl6(s, ttl);
-}
-
-#if 0
-/**
- * sk_set_md5_auth - add / remove MD5 security association for given socket
- * @s: socket
- * @local: IP address of local side
- * @remote: IP address of remote side
- * @ifa: Interface for link-local IP address
- * @passwd: Password used for MD5 authentication
- * @setkey: Update also system SA/SP database
- *
- * In TCP MD5 handling code in kernel, there is a set of security associations
- * used for choosing password and other authentication parameters according to
- * the local and remote address. This function is useful for listening socket,
- * for active sockets it may be enough to set s->password field.
- *
- * When called with passwd != NULL, the new pair is added,
- * When called with passwd == NULL, the existing pair is removed.
- *
- * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
- * stored in global SA/SP database (but the behavior also must be enabled on
- * per-socket basis). In case of multiple sockets to the same neighbor, the
- * socket-specific state must be configured for each socket while global state
- * just once per src-dst pair. The @setkey argument controls whether the global
- * state (SA/SP database) is also updated.
- *
- * Result: 0 for success, -1 for an error.
- */
-
-int
-sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
-{ DUMMY; }
-#endif
-
-/**
- * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
- * @s: socket
- * @offset: offset
- *
- * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
- * kernel will automatically fill it for outgoing packets and check it for
- * incoming packets. Should not be used on ICMPv6 sockets, where the position is
- * known to the kernel.
- *
- * Result: 0 for success, -1 for an error.
- */
-
-int
-sk_set_ipv6_checksum(sock *s, int offset)
-{
- if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
- ERR("IPV6_CHECKSUM");
-
- return 0;
-}
-
-int
-sk_set_icmp6_filter(sock *s, int p1, int p2)
-{
- /* a bit of lame interface, but it is here only for Radv */
- struct icmp6_filter f;
-
- ICMP6_FILTER_SETBLOCKALL(&f);
- ICMP6_FILTER_SETPASS(p1, &f);
- ICMP6_FILTER_SETPASS(p2, &f);
-
- if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
- ERR("ICMP6_FILTER");
-
- return 0;
-}
-
-void
-sk_log_error(sock *s, const char *p)
-{
- log(L_ERR "%s: Socket error: %s%#m", p, s->err);
-}
-
-
-/*
- * Actual struct birdsock code
- */
-
-sock *
-sk_next(sock *s)
-{
- if (!s->n.next->next)
- return NULL;
- else
- return SKIP_BACK(sock, n, s->n.next);
-}
-
-static void
-sk_alloc_bufs(sock *s)
-{
- if (!s->rbuf && s->rbsize)
- s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
- s->rpos = s->rbuf;
- if (!s->tbuf && s->tbsize)
- s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
- s->tpos = s->ttx = s->tbuf;
-}
-
-static void
-sk_free_bufs(sock *s)
-{
- if (s->rbuf_alloc)
- {
- xfree(s->rbuf_alloc);
- s->rbuf = s->rbuf_alloc = NULL;
- }
- if (s->tbuf_alloc)
- {
- xfree(s->tbuf_alloc);
- s->tbuf = s->tbuf_alloc = NULL;
- }
-}
-
-#ifdef HAVE_LIBSSH
-static void
-sk_ssh_free(sock *s)
-{
- struct ssh_sock *ssh = s->ssh;
-
- if (s->ssh == NULL)
- return;
-
- s->ssh = NULL;
-
- if (ssh->channel)
- {
- ssh_channel_close(ssh->channel);
- ssh_channel_free(ssh->channel);
- ssh->channel = NULL;
- }
-
- if (ssh->session)
- {
- ssh_disconnect(ssh->session);
- ssh_free(ssh->session);
- ssh->session = NULL;
- }
-}
-#endif
-
-
-static void
-sk_free(resource *r)
-{
- sock *s = (sock *) r;
-
- sk_free_bufs(s);
-
-#ifdef HAVE_LIBSSH
- if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE)
- sk_ssh_free(s);
-#endif
-
- if (s->loop)
- birdloop_remove_socket(s->loop, s);
-
- if (s->fd >= 0 && s->type != SK_SSH && s->type != SK_SSH_ACTIVE)
- close(s->fd);
-
- s->fd = -1;
-}
-
-void
-sk_set_rbsize(sock *s, uint val)
-{
- ASSERT(s->rbuf_alloc == s->rbuf);
-
- if (s->rbsize == val)
- return;
-
- s->rbsize = val;
- xfree(s->rbuf_alloc);
- s->rbuf_alloc = xmalloc(val);
- s->rpos = s->rbuf = s->rbuf_alloc;
-
- if ((s->type == SK_UDP) || (s->type == SK_IP))
- sk_set_min_rcvbuf(s, s->rbsize);
-}
-
-void
-sk_set_tbsize(sock *s, uint val)
-{
- ASSERT(s->tbuf_alloc == s->tbuf);
-
- if (s->tbsize == val)
- return;
-
- byte *old_tbuf = s->tbuf;
-
- s->tbsize = val;
- s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
- s->tpos = s->tbuf + (s->tpos - old_tbuf);
- s->ttx = s->tbuf + (s->ttx - old_tbuf);
-}
-
-void
-sk_set_tbuf(sock *s, void *tbuf)
-{
- s->tbuf = tbuf ?: s->tbuf_alloc;
- s->ttx = s->tpos = s->tbuf;
-}
-
-void
-sk_reallocate(sock *s)
-{
- sk_free_bufs(s);
- sk_alloc_bufs(s);
-}
-
-static void
-sk_dump(struct dump_request *dreq, resource *r)
-{
- sock *s = (sock *) r;
- static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
-
- RDUMP("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
- sk_type_names[s->type],
- s->data,
- s->saddr,
- s->sport,
- s->daddr,
- s->dport,
- s->tos,
- s->ttl,
- s->iface ? s->iface->name : "none");
-}
-
-static struct resclass sk_class = {
- "Socket",
- sizeof(sock),
- sk_free,
- sk_dump,
- NULL,
- NULL
-};
-
-/**
- * sk_new - create a socket
- * @p: pool
- *
- * This function creates a new socket resource. If you want to use it,
- * you need to fill in all the required fields of the structure and
- * call sk_open() to do the actual opening of the socket.
- *
- * The real function name is sock_new(), sk_new() is a macro wrapper
- * to avoid collision with OpenSSL.
- */
-sock *
-sock_new(pool *p)
-{
- sock *s = ralloc(p, &sk_class);
- s->pool = p;
- // s->saddr = s->daddr = IPA_NONE;
- s->tos = s->priority = s->ttl = -1;
- s->fd = -1;
- return s;
-}
-
-static int
-sk_setup(sock *s)
-{
- int y = 1;
- int fd = s->fd;
-
- if (s->type == SK_SSH_ACTIVE)
- return 0;
-
- if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
- ERR("O_NONBLOCK");
-
- if (!s->af)
- return 0;
-
- if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
- s->flags |= SKF_PKTINFO;
-
-#ifdef CONFIG_USE_HDRINCL
- if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
- {
- s->flags &= ~SKF_PKTINFO;
- s->flags |= SKF_HDRINCL;
- if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
- ERR("IP_HDRINCL");
- }
-#endif
-
- if (s->vrf && (s->vrf != &default_vrf) && !s->iface && (s->type != SK_TCP))
- {
- /* Bind socket to associated VRF interface.
- This is Linux-specific, but so is SO_BINDTODEVICE.
- For accepted TCP sockets it is inherited from the listening one. */
-#ifdef SO_BINDTODEVICE
- struct ifreq ifr = {};
- strcpy(ifr.ifr_name, s->vrf->name);
- if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
- ERR("SO_BINDTODEVICE");
-#endif
- }
-
- if (s->iface)
- {
-#ifdef SO_BINDTODEVICE
- struct ifreq ifr = {};
- strcpy(ifr.ifr_name, s->iface->name);
- if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
- ERR("SO_BINDTODEVICE");
-#endif
-
-#ifdef CONFIG_UNIX_DONTROUTE
- if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
- ERR("SO_DONTROUTE");
-#endif
- }
-
- if (sk_is_ipv4(s))
- {
- if (s->flags & SKF_LADDR_RX)
- if (sk_request_cmsg4_pktinfo(s) < 0)
- return -1;
-
- if (s->flags & SKF_TTL_RX)
- if (sk_request_cmsg4_ttl(s) < 0)
- return -1;
-
- if ((s->type == SK_UDP) || (s->type == SK_IP))
- if (sk_disable_mtu_disc4(s) < 0)
- return -1;
-
- if (s->ttl >= 0)
- if (sk_set_ttl4(s, s->ttl) < 0)
- return -1;
-
- if (s->tos >= 0)
- if (sk_set_tos4(s, s->tos) < 0)
- return -1;
- }
-
- if (sk_is_ipv6(s))
- {
- if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP))
- if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
- ERR("IPV6_V6ONLY");
-
- if (s->flags & SKF_LADDR_RX)
- if (sk_request_cmsg6_pktinfo(s) < 0)
- return -1;
-
- if (s->flags & SKF_TTL_RX)
- if (sk_request_cmsg6_ttl(s) < 0)
- return -1;
-
- if ((s->type == SK_UDP) || (s->type == SK_IP))
- if (sk_disable_mtu_disc6(s) < 0)
- return -1;
-
- if (s->ttl >= 0)
- if (sk_set_ttl6(s, s->ttl) < 0)
- return -1;
-
- if (s->tos >= 0)
- if (sk_set_tos6(s, s->tos) < 0)
- return -1;
-
- if ((s->flags & SKF_UDP6_NO_CSUM_RX) && (s->type == SK_UDP))
- if (sk_set_udp6_no_csum_rx(s) < 0)
- return -1;
- }
-
- /* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */
- if (s->priority >= 0)
- if (sk_set_priority(s, s->priority) < 0)
- return -1;
-
- if ((s->type == SK_UDP) || (s->type == SK_IP))
- sk_set_min_rcvbuf(s, s->rbsize);
-
- return 0;
-}
-
-static int
-sk_connect(sock *s)
-{
- sockaddr sa;
- sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
- return connect(s->fd, &sa.sa, SA_LEN(sa));
-}
-
-static void
-sk_tcp_connected(sock *s)
-{
- sockaddr sa;
- int sa_len = sizeof(sa);
-
- if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
- (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
- log(L_WARN "SOCK: Cannot get local IP address for TCP>");
-
- s->type = SK_TCP;
- sk_alloc_bufs(s);
- s->tx_hook(s);
-}
-
-#ifdef HAVE_LIBSSH
-static void
-sk_ssh_connected(sock *s)
-{
- sk_alloc_bufs(s);
- s->type = SK_SSH;
- s->tx_hook(s);
-}
-#endif
-
-static int
-sk_passive_connected(sock *s, int type)
-{
- sockaddr loc_sa, rem_sa;
- int loc_sa_len = sizeof(loc_sa);
- int rem_sa_len = sizeof(rem_sa);
-
- int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
- if (fd < 0)
- {
- if ((errno != EINTR) && (errno != EAGAIN))
- s->err_hook(s, errno);
- return 0;
- }
-
- struct domain_generic *sock_lock = DG_IS_LOCKED(s->pool->domain) ? NULL : s->pool->domain;
- if (sock_lock)
- DG_LOCK(sock_lock);
-
- sock *t = sk_new(s->pool);
- t->type = type;
- t->data = s->data;
- t->af = s->af;
- t->fd = fd;
- t->ttl = s->ttl;
- t->tos = s->tos;
- t->vrf = s->vrf;
- t->rbsize = s->rbsize;
- t->tbsize = s->tbsize;
-
- if (type == SK_TCP)
- {
- if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
- (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
- log(L_WARN "SOCK: Cannot get local IP address for TCP<");
-
- if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
- log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
- }
-
- if (sk_setup(t) < 0)
- {
- /* FIXME: Call err_hook instead ? */
- log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
-
- /* FIXME: handle it better in rfree() */
- close(t->fd);
- t->fd = -1;
- sk_close(t);
- t = NULL;
- }
- else
- {
- birdloop_add_socket(s->loop, t);
- sk_alloc_bufs(t);
- }
-
- if (sock_lock)
- DG_UNLOCK(sock_lock);
-
- if (t)
- s->rx_hook(t, 0);
-
- return 1;
-}
-
-#ifdef HAVE_LIBSSH
-/*
- * Return SSH_OK or SSH_AGAIN or SSH_ERROR
- */
-static int
-sk_ssh_connect(sock *s)
-{
- s->fd = ssh_get_fd(s->ssh->session);
-
- /* Big fall thru automata */
- switch (s->ssh->state)
- {
- case SK_SSH_CONNECT:
- {
- switch (ssh_connect(s->ssh->session))
- {
- case SSH_AGAIN:
- /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
- * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
- * documented but our code relies on that.
- */
- return SSH_AGAIN;
-
- case SSH_OK:
- break;
-
- default:
- return SSH_ERROR;
- }
- } /* fallthrough */
-
- case SK_SSH_SERVER_KNOWN:
- {
- s->ssh->state = SK_SSH_SERVER_KNOWN;
-
- if (s->ssh->server_hostkey_path)
- {
- int server_identity_is_ok = 1;
-
-#ifdef HAVE_SSH_OLD_SERVER_VALIDATION_API
-#define ssh_session_is_known_server ssh_is_server_known
-#define SSH_KNOWN_HOSTS_OK SSH_SERVER_KNOWN_OK
-#define SSH_KNOWN_HOSTS_UNKNOWN SSH_SERVER_NOT_KNOWN
-#define SSH_KNOWN_HOSTS_CHANGED SSH_SERVER_KNOWN_CHANGED
-#define SSH_KNOWN_HOSTS_NOT_FOUND SSH_SERVER_FILE_NOT_FOUND
-#define SSH_KNOWN_HOSTS_ERROR SSH_SERVER_ERROR
-#define SSH_KNOWN_HOSTS_OTHER SSH_SERVER_FOUND_OTHER
-#endif
-
- /* Check server identity */
- switch (ssh_session_is_known_server(s->ssh->session))
- {
-#define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
- case SSH_KNOWN_HOSTS_OK:
- /* The server is known and has not changed. */
- break;
-
- case SSH_KNOWN_HOSTS_UNKNOWN:
- LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path);
- server_identity_is_ok = 0;
- break;
-
- case SSH_KNOWN_HOSTS_CHANGED:
- LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key.");
- server_identity_is_ok = 0;
- break;
-
- case SSH_KNOWN_HOSTS_NOT_FOUND:
- LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path);
- server_identity_is_ok = 0;
- break;
-
- case SSH_KNOWN_HOSTS_ERROR:
- LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened");
- server_identity_is_ok = 0;
- break;
-
- case SSH_KNOWN_HOSTS_OTHER:
- LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had another type recorded. " \
- "It is a possible attack.");
- server_identity_is_ok = 0;
- break;
- }
-
- if (!server_identity_is_ok)
- return SSH_ERROR;
- }
- } /* fallthrough */
-
- case SK_SSH_USERAUTH:
- {
- s->ssh->state = SK_SSH_USERAUTH;
- switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL))
- {
- case SSH_AUTH_AGAIN:
- return SSH_AGAIN;
-
- case SSH_AUTH_SUCCESS:
- break;
-
- default:
- return SSH_ERROR;
- }
- } /* fallthrough */
-
- case SK_SSH_CHANNEL:
- {
- s->ssh->state = SK_SSH_CHANNEL;
- s->ssh->channel = ssh_channel_new(s->ssh->session);
- if (s->ssh->channel == NULL)
- return SSH_ERROR;
- } /* fallthrough */
-
- case SK_SSH_SESSION:
- {
- s->ssh->state = SK_SSH_SESSION;
- switch (ssh_channel_open_session(s->ssh->channel))
- {
- case SSH_AGAIN:
- return SSH_AGAIN;
-
- case SSH_OK:
- break;
-
- default:
- return SSH_ERROR;
- }
- } /* fallthrough */
-
- case SK_SSH_SUBSYSTEM:
- {
- s->ssh->state = SK_SSH_SUBSYSTEM;
- if (s->ssh->subsystem)
- {
- switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem))
- {
- case SSH_AGAIN:
- return SSH_AGAIN;
-
- case SSH_OK:
- break;
-
- default:
- return SSH_ERROR;
- }
- }
- } /* fallthrough */
-
- case SK_SSH_ESTABLISHED:
- s->ssh->state = SK_SSH_ESTABLISHED;
- }
-
- return SSH_OK;
-}
-
-/*
- * Return file descriptor number if success
- * Return -1 if failed
- */
-static int
-sk_open_ssh(sock *s)
-{
- if (!s->ssh)
- bug("sk_open() sock->ssh is not allocated");
-
- ssh_session sess = ssh_new();
- if (sess == NULL)
- ERR2("Cannot create a ssh session");
- s->ssh->session = sess;
-
- const int verbosity = SSH_LOG_NOLOG;
- ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity);
- ssh_options_set(sess, SSH_OPTIONS_HOST, s->host);
- ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport));
- /* TODO: Add SSH_OPTIONS_BINDADDR */
- ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username);
-
- if (s->ssh->server_hostkey_path)
- ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path);
-
- if (s->ssh->client_privkey_path)
- ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path);
-
- ssh_set_blocking(sess, 0);
-
- switch (sk_ssh_connect(s))
- {
- case SSH_AGAIN:
- break;
-
- case SSH_OK:
- sk_ssh_connected(s);
- break;
-
- case SSH_ERROR:
- ERR2(ssh_get_error(sess));
- break;
- }
-
- return ssh_get_fd(sess);
-
- err:
- return -1;
-}
-#endif
-
-/**
- * sk_open - open a socket
- * @loop: loop
- * @s: socket
- *
- * This function takes a socket resource created by sk_new() and
- * initialized by the user and binds a corresponding network connection
- * to it.
- *
- * Result: 0 for success, -1 for an error.
- */
-int
-sk_open(sock *s, struct birdloop *loop)
-{
- int af = AF_UNSPEC;
- int fd = -1;
- int do_bind = 0;
- int bind_port = 0;
- ip_addr bind_addr = IPA_NONE;
- sockaddr sa;
-
- if (s->type <= SK_IP)
- {
- /*
- * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
- * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
- * But the specifications have to be consistent.
- */
-
- switch (s->subtype)
- {
- case 0:
- ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) ||
- (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr)));
- af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6;
- break;
-
- case SK_IPV4:
- ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr));
- ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr));
- af = AF_INET;
- break;
-
- case SK_IPV6:
- ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr));
- ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr));
- af = AF_INET6;
- break;
-
- default:
- bug("Invalid subtype %d", s->subtype);
- }
- }
-
- switch (s->type)
- {
- case SK_TCP_ACTIVE:
- s->ttx = ""; /* Force s->ttx != s->tpos */
- /* Fall thru */
- case SK_TCP_PASSIVE:
- fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
- bind_port = s->sport;
- bind_addr = s->saddr;
- do_bind = bind_port || ipa_nonzero(bind_addr);
- break;
-
-#ifdef HAVE_LIBSSH
- case SK_SSH_ACTIVE:
- s->ttx = ""; /* Force s->ttx != s->tpos */
- fd = sk_open_ssh(s);
- break;
-#endif
-
- case SK_UDP:
- fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
- bind_port = s->sport;
- bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
- do_bind = 1;
- break;
-
- case SK_IP:
- fd = socket(af, SOCK_RAW, s->dport);
- bind_port = 0;
- bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
- do_bind = ipa_nonzero(bind_addr);
- break;
-
- case SK_MAGIC:
- af = 0;
- fd = s->fd;
- break;
-
- default:
- bug("sk_open() called for invalid sock type %d", s->type);
- }
-
- if (fd < 0)
- ERR("socket");
-
- s->af = af;
- s->fd = fd;
-
- if (sk_setup(s) < 0)
- goto err;
-
- if (do_bind)
- {
- if (bind_port)
- {
- int y = 1;
-
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
- ERR2("SO_REUSEADDR");
-
-#ifdef CONFIG_NO_IFACE_BIND
- /* Workaround missing ability to bind to an iface */
- if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
- {
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
- ERR2("SO_REUSEPORT");
- }
-#endif
- }
- else
- if (s->flags & SKF_HIGH_PORT)
- if (sk_set_high_port(s) < 0)
- log(L_WARN "Socket error: %s%#m", s->err);
-
- if (s->flags & SKF_FREEBIND)
- if (sk_set_freebind(s) < 0)
- log(L_WARN "Socket error: %s%#m", s->err);
-
- sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
- if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
- ERR2("bind");
- }
-
- if (s->password)
- if (sk_set_md5_auth(s, s->saddr, s->daddr, -1, s->iface, s->password, 0) < 0)
- goto err;
-
- switch (s->type)
- {
- case SK_TCP_ACTIVE:
- if (sk_connect(s) >= 0)
- sk_tcp_connected(s);
- else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
- errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
- ERR2("connect");
- break;
-
- case SK_TCP_PASSIVE:
- if (listen(fd, 8) < 0)
- ERR2("listen");
- break;
-
- case SK_UDP:
- if (s->flags & SKF_CONNECT)
- if (sk_connect(s) < 0)
- ERR2("connect");
-
- sk_alloc_bufs(s);
- break;
-
- case SK_SSH_ACTIVE:
- case SK_MAGIC:
- break;
-
- default:
- sk_alloc_bufs(s);
- }
-
- birdloop_add_socket(loop, s);
- return 0;
-
-err:
- close(fd);
- s->fd = -1;
- return -1;
-}
-
-int
-sk_open_unix(sock *s, struct birdloop *loop, const char *name)
-{
- struct sockaddr_un sa;
- int fd;
-
- /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
-
- fd = socket(AF_UNIX, SOCK_STREAM, 0);
- if (fd < 0)
- return -1;
-
- if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
- return -1;
-
- /* Path length checked in test_old_bird() but we may need unix sockets for other reasons in future */
- ASSERT_DIE(strlen(name) < sizeof(sa.sun_path));
-
- sa.sun_family = AF_UNIX;
- strcpy(sa.sun_path, name);
-
- if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
- return -1;
-
- if (listen(fd, 8) < 0)
- return -1;
-
- s->fd = fd;
- birdloop_add_socket(loop, s);
- return 0;
-}
-
-
-#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
- CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
-#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
-
-static void
-sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
-{
- if (sk_is_ipv4(s))
- sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
- else
- sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
-}
-
-static void
-sk_process_cmsgs(sock *s, struct msghdr *msg)
-{
- struct cmsghdr *cm;
-
- s->laddr = IPA_NONE;
- s->lifindex = 0;
- s->rcv_ttl = -1;
-
- for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
- {
- if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
- {
- sk_process_cmsg4_pktinfo(s, cm);
- sk_process_cmsg4_ttl(s, cm);
- }
-
- if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
- {
- sk_process_cmsg6_pktinfo(s, cm);
- sk_process_cmsg6_ttl(s, cm);
- }
- }
-}
-
-
-static inline int
-sk_sendmsg(sock *s)
-{
- struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
- byte cmsg_buf[CMSG_TX_SPACE];
- sockaddr dst;
- int flags = 0;
-
- sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
-
- struct msghdr msg = {
- .msg_name = &dst.sa,
- .msg_namelen = SA_LEN(dst),
- .msg_iov = &iov,
- .msg_iovlen = 1
- };
-
-#ifdef CONFIG_DONTROUTE_UNICAST
- /* FreeBSD silently changes TTL to 1 when MSG_DONTROUTE is used, therefore we
- cannot use it for other cases (e.g. when TTL security is used). */
- if (ipa_is_ip4(s->daddr) && ip4_is_unicast(ipa_to_ip4(s->daddr)) && (s->ttl == 1))
- flags = MSG_DONTROUTE;
-#endif
-
-#ifdef CONFIG_USE_HDRINCL
- byte hdr[20];
- struct iovec iov2[2] = { {hdr, 20}, iov };
-
- if (s->flags & SKF_HDRINCL)
- {
- sk_prepare_ip_header(s, hdr, iov.iov_len);
- msg.msg_iov = iov2;
- msg.msg_iovlen = 2;
- }
-#endif
-
- if (s->flags & SKF_PKTINFO)
- sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
-
- return sendmsg(s->fd, &msg, flags);
-}
-
-static inline int
-sk_recvmsg(sock *s)
-{
- struct iovec iov = {s->rbuf, s->rbsize};
- byte cmsg_buf[CMSG_RX_SPACE];
- sockaddr src;
-
- struct msghdr msg = {
- .msg_name = &src.sa,
- .msg_namelen = sizeof(src), // XXXX ??
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = cmsg_buf,
- .msg_controllen = sizeof(cmsg_buf),
- .msg_flags = 0
- };
-
- int rv = recvmsg(s->fd, &msg, 0);
- if (rv < 0)
- return rv;
-
- //ifdef IPV4
- // if (cf_type == SK_IP)
- // rv = ipv4_skip_header(pbuf, rv);
- //endif
-
- sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
- sk_process_cmsgs(s, &msg);
-
- if (msg.msg_flags & MSG_TRUNC)
- s->flags |= SKF_TRUNCATED;
- else
- s->flags &= ~SKF_TRUNCATED;
-
- return rv;
-}
-
-
-static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
-
-bool
-sk_tx_pending(sock *s)
-{
- return s->ttx != s->tpos;
-}
-
-
-static int
-sk_maybe_write(sock *s)
-{
- int e;
-
- switch (s->type)
- {
- case SK_TCP:
- case SK_MAGIC:
- case SK_UNIX:
- while (sk_tx_pending(s))
- {
- e = write(s->fd, s->ttx, s->tpos - s->ttx);
-
- if (e < 0)
- {
- if (errno != EINTR && errno != EAGAIN)
- {
- reset_tx_buffer(s);
- /* EPIPE is just a connection close notification during TX */
- s->err_hook(s, (errno != EPIPE) ? errno : 0);
- return -1;
- }
- return 0;
- }
- s->ttx += e;
- }
- reset_tx_buffer(s);
- return 1;
-
-#ifdef HAVE_LIBSSH
- case SK_SSH:
- while (sk_tx_pending(s))
- {
- e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx);
-
- if (e < 0)
- {
- s->err = ssh_get_error(s->ssh->session);
- s->err_hook(s, ssh_get_error_code(s->ssh->session));
-
- reset_tx_buffer(s);
- /* EPIPE is just a connection close notification during TX */
- s->err_hook(s, (errno != EPIPE) ? errno : 0);
- return -1;
- }
- s->ttx += e;
- }
- reset_tx_buffer(s);
- return 1;
-#endif
-
- case SK_UDP:
- case SK_IP:
- {
- if (s->tbuf == s->tpos)
- return 1;
-
- e = sk_sendmsg(s);
-
- if (e < 0)
- {
- if (errno != EINTR && errno != EAGAIN)
- {
- reset_tx_buffer(s);
- s->err_hook(s, errno);
- return -1;
- }
-
- if (!s->tx_hook)
- reset_tx_buffer(s);
- return 0;
- }
- reset_tx_buffer(s);
- return 1;
- }
-
- default:
- bug("sk_maybe_write: unknown socket type %d", s->type);
- }
-}
-
-int
-sk_rx_ready(sock *s)
-{
- int rv;
- struct pollfd pfd = { .fd = s->fd };
- pfd.events |= POLLIN;
-
- redo:
- rv = poll(&pfd, 1, 0);
-
- if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
- goto redo;
-
- return rv;
-}
-
-/**
- * sk_send - send data to a socket
- * @s: socket
- * @len: number of bytes to send
- *
- * This function sends @len bytes of data prepared in the
- * transmit buffer of the socket @s to the network connection.
- * If the packet can be sent immediately, it does so and returns
- * 1, else it queues the packet for later processing, returns 0
- * and calls the @tx_hook of the socket when the tranmission
- * takes place.
- */
-int
-sk_send(sock *s, unsigned len)
-{
- s->ttx = s->tbuf;
- s->tpos = s->tbuf + len;
-
- int e = sk_maybe_write(s);
- if (e == 0) /* Trigger thread poll reload to poll this socket's write. */
- socket_changed(s);
-
- return e;
-}
-
-/**
- * sk_send_to - send data to a specific destination
- * @s: socket
- * @len: number of bytes to send
- * @addr: IP address to send the packet to
- * @port: port to send the packet to
- *
- * This is a sk_send() replacement for connection-less packet sockets
- * which allows destination of the packet to be chosen dynamically.
- * Raw IP sockets should use 0 for @port.
+#if 0
+/**********
+ * Internal event log for the mainloop only makes no sense.
+ * To be replaced by a lockless event log keeping much more information
+ * about all the logs throughout all the threads.
*/
-int
-sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
-{
- s->daddr = addr;
- if (port)
- s->dport = port;
-
- s->ttx = s->tbuf;
- s->tpos = s->tbuf + len;
- return sk_maybe_write(s);
-}
-
-/*
-int
-sk_send_full(sock *s, unsigned len, struct iface *ifa,
- ip_addr saddr, ip_addr daddr, unsigned dport)
-{
- s->iface = ifa;
- s->saddr = saddr;
- s->daddr = daddr;
- s->dport = dport;
- s->ttx = s->tbuf;
- s->tpos = s->tbuf + len;
- return sk_maybe_write(s);
-}
-*/
-
-static void
-call_rx_hook(sock *s, int size)
-{
- if (s->rx_hook(s, size))
- {
- /* We need to be careful since the socket could have been deleted by the hook */
- if (s->loop->sock_active == s)
- s->rpos = s->rbuf;
- }
-}
-
-#ifdef HAVE_LIBSSH
-static int
-sk_read_ssh(sock *s)
-{
- ssh_channel rchans[2] = { s->ssh->channel, NULL };
- struct timeval timev = { 1, 0 };
-
- if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR)
- return 1; /* Try again */
-
- if (ssh_channel_is_eof(s->ssh->channel) != 0)
- {
- /* The remote side is closing the connection */
- s->err_hook(s, 0);
- return 0;
- }
-
- if (rchans[0] == NULL)
- return 0; /* No data is available on the socket */
-
- const uint used_bytes = s->rpos - s->rbuf;
- const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0);
- if (read_bytes > 0)
- {
- /* Received data */
- s->rpos += read_bytes;
- call_rx_hook(s, used_bytes + read_bytes);
- return 1;
- }
- else if (read_bytes == 0)
- {
- if (ssh_channel_is_eof(s->ssh->channel) != 0)
- {
- /* The remote side is closing the connection */
- s->err_hook(s, 0);
- }
- }
- else
- {
- s->err = ssh_get_error(s->ssh->session);
- s->err_hook(s, ssh_get_error_code(s->ssh->session));
- }
-
- return 0; /* No data is available on the socket */
-}
-#endif
-
- /* sk_read() and sk_write() are called from BFD's event loop */
-
-static inline int
-sk_read_noflush(sock *s, int revents)
-{
- switch (s->type)
- {
- case SK_TCP_PASSIVE:
- return sk_passive_connected(s, SK_TCP);
-
- case SK_UNIX_PASSIVE:
- return sk_passive_connected(s, SK_UNIX);
-
- case SK_TCP:
- case SK_UNIX:
- {
- int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
-
- if (c < 0)
- {
- if (errno != EINTR && errno != EAGAIN)
- s->err_hook(s, errno);
- else if (errno == EAGAIN && !(revents & POLLIN))
- {
- log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
- s->err_hook(s, 0);
- }
- }
- else if (!c)
- s->err_hook(s, 0);
- else
- {
- s->rpos += c;
- call_rx_hook(s, s->rpos - s->rbuf);
- return 1;
- }
- return 0;
- }
-
-#ifdef HAVE_LIBSSH
- case SK_SSH:
- return sk_read_ssh(s);
-#endif
-
- case SK_MAGIC:
- return s->rx_hook(s, 0);
-
- default:
- {
- int e = sk_recvmsg(s);
-
- if (e < 0)
- {
- if (errno != EINTR && errno != EAGAIN)
- s->err_hook(s, errno);
- return 0;
- }
-
- s->rpos = s->rbuf + e;
- s->rx_hook(s, e);
- return 1;
- }
- }
-}
-
-int
-sk_read(sock *s, int revents)
-{
- int e = sk_read_noflush(s, revents);
- tmp_flush();
- return e;
-}
-
-static inline int
-sk_write_noflush(sock *s)
-{
- switch (s->type)
- {
- case SK_TCP_ACTIVE:
- {
- if (sk_connect(s) >= 0 || errno == EISCONN)
- sk_tcp_connected(s);
- else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
- s->err_hook(s, errno);
- return 0;
- }
-
-#ifdef HAVE_LIBSSH
- case SK_SSH_ACTIVE:
- {
- switch (sk_ssh_connect(s))
- {
- case SSH_OK:
- sk_ssh_connected(s);
- break;
-
- case SSH_AGAIN:
- return 1;
-
- case SSH_ERROR:
- s->err = ssh_get_error(s->ssh->session);
- s->err_hook(s, ssh_get_error_code(s->ssh->session));
- break;
- }
- return 0;
- }
-#endif
-
- default:
- if (sk_tx_pending(s) && sk_maybe_write(s) > 0)
- {
- if (s->tx_hook)
- s->tx_hook(s);
- return 1;
- }
- return 0;
- }
-}
-
-int
-sk_write(sock *s)
-{
- int e = sk_write_noflush(s);
- tmp_flush();
- return e;
-}
-
-int sk_is_ipv4(sock *s)
-{ return s->af == AF_INET; }
-
-int sk_is_ipv6(sock *s)
-{ return s->af == AF_INET6; }
-
-void
-sk_err(sock *s, int revents)
-{
- int se = 0, sse = sizeof(se);
- if ((s->type != SK_MAGIC) && (revents & POLLERR))
- if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
- {
- log(L_ERR "IO: Socket error: SO_ERROR: %m");
- se = 0;
- }
-
- s->err_hook(s, se);
- tmp_flush();
-}
-
-void
-sk_dump_all(struct dump_request *dreq)
-{
- node *n;
- sock *s;
-
- RDUMP("Open sockets:\n");
- dreq->indent += 3;
- WALK_LIST(n, main_birdloop.sock_list)
- {
- s = SKIP_BACK(sock, n, n);
- RDUMP("%p ", s);
- sk_dump(dreq, &s->r);
- }
- dreq->indent -= 3;
- RDUMP("\n");
-}
-
/*
* Internal event log and watchdog
}
}
+#endif
+
+static btime last_io_time, loop_time;
+static int watchdog_active;
+
void
watchdog_sigalrm(int sig UNUSED)
{
/* Update last_io_time and duration, but skip latency check */
struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
gr->latency_limit = 0xffffffff;
- io_update_time();
+
+ last_io_time = current_time_now();
debug_safe("Watchdog timer timed out\n");
static inline void
watchdog_start1(void)
{
- io_update_time();
-
- loop_time = last_io_time;
+ loop_time = last_io_time = current_time_now();
}
static inline void
watchdog_start(void)
{
- io_update_time();
+ loop_time = last_io_time = current_time_now();
+// event_log_num = 0;
- loop_time = last_io_time;
- event_log_num = 0;
-
- struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
+ union bird_global_runtime *gr = BIRD_GLOBAL_RUNTIME;
if (gr->watchdog_timeout)
{
alarm(gr->watchdog_timeout);
static inline void
watchdog_stop(void)
{
- io_update_time();
+ last_io_time = current_time_now();
if (watchdog_active)
{
}
btime duration = last_io_time - loop_time;
- struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
+ union bird_global_runtime *gr = BIRD_GLOBAL_RUNTIME;
+ /*
if (duration > gr->watchdog_warning)
log(L_WARN "I/O loop cycle took %u.%03u ms for %d events",
(uint) (duration TO_MS), (uint) (duration % 1000), event_log_num);
+ */
+
+ if (duration > gr->watchdog_warning)
+ log(L_WARN "I/O loop cycle took %u.%03u ms",
+ (uint) (duration TO_MS), (uint) (duration % 1000));
}
#define SHORT_LOOP_MAX 10
#define WORK_EVENTS_MAX 10
-sock *stored_sock;
+extern sock *stored_sock; /* mainloop hack */
+
+int sk_read(sock *s, int revents);
+int sk_write(sock *s);
+void sk_err(sock *s, int revents);
void
io_loop(void)
ev_run_list(&global_event_list);
ev_run_list_limited(&global_work_list, WORK_EVENTS_MAX);
ev_run_list(&main_birdloop.event_list);
- timers_fire(&main_birdloop.time, 1);
- io_close_event();
+ timers_fire(&main_birdloop.time);
+// io_close_event();
events =
!ev_list_empty(&global_event_list) ||
if (async_config_flag)
{
- io_log_event(async_config, NULL, DL_EVENTS);
+// io_log_event(async_config, NULL, DL_EVENTS);
async_config();
async_config_flag = 0;
continue;
}
if (async_dump_flag)
{
- io_log_event(async_dump, NULL, DL_EVENTS);
+// io_log_event(async_dump, NULL, DL_EVENTS);
async_dump();
async_dump_flag = 0;
continue;
}
if (async_shutdown_flag)
{
- io_log_event(async_shutdown, NULL, DL_EVENTS);
+// io_log_event(async_shutdown, NULL, DL_EVENTS);
async_shutdown();
async_shutdown_flag = 0;
continue;
do
{
steps--;
- io_log_event(s->rx_hook, s->data, DL_SOCKETS);
+// io_log_event(s->rx_hook, s->data, DL_SOCKETS);
e = sk_read(s, pfd.pfd.data[s->index].revents);
}
while (e && (main_birdloop.sock_active == s) && s->rx_hook && steps);
do
{
steps--;
- io_log_event(s->tx_hook, s->data, DL_SOCKETS);
+// io_log_event(s->tx_hook, s->data, DL_SOCKETS);
e = sk_write(s);
}
while (e && (main_birdloop.sock_active == s) && steps);
if (!s->fast_rx && (pfd.pfd.data[s->index].revents & POLLIN) && s->rx_hook)
{
count++;
- io_log_event(s->rx_hook, s->data, DL_SOCKETS);
+// io_log_event(s->rx_hook, s->data, DL_SOCKETS);
sk_read(s, pfd.pfd.data[s->index].revents);
if (s != main_birdloop.sock_active)
continue;
close(fd);
}
-
-/*
- * DNS resolver
- */
-
-ip_addr
-resolve_hostname(const char *host, int type, const char **err_msg)
-{
- struct addrinfo *res;
- struct addrinfo hints = {
- .ai_family = AF_UNSPEC,
- .ai_socktype = (type == SK_UDP) ? SOCK_DGRAM : SOCK_STREAM,
- .ai_flags = AI_ADDRCONFIG,
- };
-
- *err_msg = NULL;
-
- int err_code = getaddrinfo(host, NULL, &hints, &res);
- if (err_code != 0)
- {
- *err_msg = gai_strerror(err_code);
- return IPA_NONE;
- }
-
- ip_addr addr = IPA_NONE;
- uint unused;
-
- sockaddr_read((sockaddr *) res->ai_addr, res->ai_family, &addr, NULL, &unused);
- freeaddrinfo(res);
-
- return addr;
-}
#define THIS_KRT ((struct krt_config *) this_proto)
#define THIS_KIF ((struct kif_config *) this_proto)
-#define KIF_IFACE ((struct kif_iface_config *) this_ipatt)
+#define KIF_IFACE ((struct iface_config *) this_ipatt)
static void
kif_set_preferred(ip_addr ip)
kif_iface_start:
{
- this_ipatt = cfg_allocz(sizeof(struct kif_iface_config));
+ this_ipatt = cfg_allocz(sizeof(struct iface_config));
add_tail(&THIS_KIF->iface_list, NODE this_ipatt);
init_list(&this_ipatt->ipn_list);
}
static timer *kif_scan_timer;
static btime kif_last_shot;
-static struct kif_iface_config kif_default_iface = {};
+static struct iface_config kif_default_iface = {};
-struct kif_iface_config *
+struct iface_config *
kif_get_iface_config(struct iface *iface)
{
struct kif_config *cf = (void *) (kif_proto->p.cf);
- struct kif_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL);
+ struct iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL);
return ic ?: &kif_default_iface;
}
struct kif_config *s = (struct kif_config *) src;
/* Copy interface config list */
- cfg_copy_list(&d->iface_list, &s->iface_list, sizeof(struct kif_iface_config));
+ cfg_copy_list(&d->iface_list, &s->iface_list, sizeof(struct iface_config));
/* Fix sysdep parts */
kif_sys_copy_config(d, s);
struct proto_config c;
struct kif_params sys; /* Sysdep params */
- list iface_list; /* List of iface configs (struct kif_iface_config) */
+ list iface_list; /* List of iface configs (struct iface_config) */
btime scan_time; /* How often we re-scan interfaces */
};
-struct kif_iface_config {
- struct iface_patt i;
-
- ip_addr pref_v4;
- ip_addr pref_v6;
- ip_addr pref_ll;
-};
-
struct kif_proto {
struct proto p;
struct kif_state sys; /* Sysdep state */
#define KIF_CF ((struct kif_config *)p->p.cf)
-struct kif_iface_config * kif_get_iface_config(struct iface *iface);
+struct iface_config * kif_get_iface_config(struct iface *iface);
struct proto_config * krt_init_config(int class);
void kif_do_scan(struct kif_proto *);
-int kif_update_sysdep_addr(struct iface *i);
-
#endif
* user's manual.
*/
+#include <netdb.h>
#include <stdatomic.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/socket.h>
#include <time.h>
#include <unistd.h>
#include <errno.h>
static struct rfile *dbg_rf;
static char *current_syslog_name = NULL; /* NULL -> syslog closed */
+const char *bird_name = NULL;
_Atomic uint max_thread_id = 1;
_Thread_local uint this_thread_id;
if (ipa_zero(lc->udp_ip))
{
- cf_warn("Cannot resolve hostname '%s': %s", l->udp_host, err_msg);
+ log(L_WARN "Cannot resolve hostname '%s': %s", l->udp_host, err_msg);
goto resolve_fail;
}
}
if (sk_open(sk, &main_birdloop) < 0)
{
- cf_warn("Cannot open UDP log socket: %s%#m", sk->err);
+ log(L_WARN "Cannot open UDP log socket: %s%#m", sk->err);
rfree(sk);
resolve_fail:
log_lock();
void
log_init_debug(char *f)
{
+ ASSERT_DIE(bird_name);
clock_gettime(CLOCK_MONOTONIC, &dbg_time_start);
if (dbg_rf && dbg_rf != &rf_stderr)
exit(1);
}
}
+
+/*
+ * Setting BIRD name
+ */
+
+static inline char *
+get_bird_name(char *s, char *def)
+{
+ char *t;
+ if (!s)
+ return def;
+ t = strrchr(s, '/');
+ if (!t)
+ return s;
+ if (!t[1])
+ return def;
+ return t+1;
+}
+
+void set_daemon_name(char *path, char *def)
+{
+ bird_name = get_bird_name(path, def);
+}
+
+/*
+ * DNS resolver
+ */
+
+ip_addr
+resolve_hostname(const char *host, int type, const char **err_msg)
+{
+ struct addrinfo *res;
+ struct addrinfo hints = {
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = (type == SK_UDP) ? SOCK_DGRAM : SOCK_STREAM,
+ .ai_flags = AI_ADDRCONFIG,
+ };
+
+ *err_msg = NULL;
+
+ int err_code = getaddrinfo(host, NULL, &hints, &res);
+ if (err_code != 0)
+ {
+ *err_msg = gai_strerror(err_code);
+ return IPA_NONE;
+ }
+
+ ip_addr addr = IPA_NONE;
+ uint unused;
+
+ sockaddr_read((sockaddr *) res->ai_addr, res->ai_family, &addr, NULL, &unused);
+ freeaddrinfo(res);
+
+ return addr;
+}
{
init_list(&c->logfiles);
- c->latency_limit = UNIX_DEFAULT_LATENCY_LIMIT;
- c->watchdog_warning = UNIX_DEFAULT_WATCHDOG_WARNING;
+ c->runtime.latency_limit = UNIX_DEFAULT_LATENCY_LIMIT;
+ c->runtime.watchdog_warning = UNIX_DEFAULT_WATCHDOG_WARNING;
+
+ alloc_preconfig(&c->runtime.alloc);
#ifdef PATH_IPROUTE_DIR
read_iproute_table(c, PATH_IPROUTE_DIR "/rt_protos", "ipp_", 255);
static void cli_commit(struct config *new, struct config *old);
void
-sysdep_commit(struct config *new, struct config *old)
+sysdep_commit(struct config *new, struct config *old UNUSED)
{
if (!new->shutdown)
+ {
log_switch(0, &new->logfiles, new->syslog_name);
-
- cli_commit(new, old);
- bird_thread_commit(new, old);
+ cli_commit(new, old);
+ bird_thread_commit(&new->threads);
+ }
}
static int
return 0;
}
-#define GLOBAL_CLI_DEBUG (atomic_load_explicit(&global_runtime, memory_order_relaxed)->cli_debug)
+#define GLOBAL_CLI_DEBUG (BIRD_GLOBAL_RUNTIME->cli_debug)
static void
cli_err(sock *s, int err)
static char *opt_list = "bc:dD:ps:P:u:g:flRh";
int parse_and_exit;
-char *bird_name;
static char *use_user;
static char *use_group;
static int run_in_foreground = 0;
exit(0);
}
-static inline char *
-get_bird_name(char *s, char *def)
-{
- char *t;
- if (!s)
- return def;
- t = strrchr(s, '/');
- if (!t)
- return s;
- if (!t[1])
- return def;
- return t+1;
-}
-
static inline uid_t
get_uid(const char *s)
{
int socket_changed = 0;
int c;
- bird_name = get_bird_name(argv[0], "bird");
+ set_daemon_name(argv[0], "bird");
+
if (argc == 2)
{
if (!strcmp(argv[1], "--version"))
--- /dev/null
+/*
+ * BIRD Internet Routing Daemon -- Unix I/O
+ *
+ * (c) 1998--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Ondrej Filip <feela@network.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+/* Unfortunately, some glibc versions hide parts of RFC 3542 API
+ if _GNU_SOURCE is not defined. */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <poll.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/icmp6.h>
+#include <netdb.h>
+
+#include "nest/bird.h"
+#include "lib/lists.h"
+#include "lib/resource.h"
+#include "lib/socket.h"
+#include "lib/event.h"
+#include "lib/locking.h"
+#include "lib/timer.h"
+#include "lib/string.h"
+#include "nest/iface.h"
+#include "conf/conf.h"
+
+#include "sysdep/unix/unix.h"
+#include "sysdep/unix/io-loop.h"
+#include CONFIG_INCLUDE_SYSIO_H
+
+/**
+ * DOC: Sockets
+ *
+ * Socket resources represent network connections. Their data structure (&socket)
+ * contains a lot of fields defining the exact type of the socket, the local and
+ * remote addresses and ports, pointers to socket buffers and finally pointers to
+ * hook functions to be called when new data have arrived to the receive buffer
+ * (@rx_hook), when the contents of the transmit buffer have been transmitted
+ * (@tx_hook) and when an error or connection close occurs (@err_hook).
+ *
+ * Freeing of sockets from inside socket hooks is perfectly safe.
+ */
+
+#ifndef SOL_IP
+#define SOL_IP IPPROTO_IP
+#endif
+
+#ifndef SOL_IPV6
+#define SOL_IPV6 IPPROTO_IPV6
+#endif
+
+#ifndef SOL_ICMPV6
+#define SOL_ICMPV6 IPPROTO_ICMPV6
+#endif
+
+
+/*
+ * Sockaddr helper functions
+ */
+
+static inline int UNUSED sockaddr_length(int af)
+{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
+
+static inline void
+sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
+{
+ memset(sa, 0, sizeof(struct sockaddr_in));
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+ sa->sin_len = sizeof(struct sockaddr_in);
+#endif
+ sa->sin_family = AF_INET;
+ sa->sin_port = htons(port);
+ sa->sin_addr = ipa_to_in4(a);
+}
+
+static inline void
+sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
+{
+ memset(sa, 0, sizeof(struct sockaddr_in6));
+#ifdef SIN6_LEN
+ sa->sin6_len = sizeof(struct sockaddr_in6);
+#endif
+ sa->sin6_family = AF_INET6;
+ sa->sin6_port = htons(port);
+ sa->sin6_flowinfo = 0;
+ sa->sin6_addr = ipa_to_in6(a);
+
+ if (ifa && ipa_is_link_local(a))
+ sa->sin6_scope_id = ifa->index;
+}
+
+void
+sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
+{
+ if (af == AF_INET)
+ sockaddr_fill4((struct sockaddr_in *) sa, a, port);
+ else if (af == AF_INET6)
+ sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
+ else
+ bug("Unknown AF");
+}
+
+static inline void
+sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
+{
+ *port = ntohs(sa->sin_port);
+ *a = ipa_from_in4(sa->sin_addr);
+}
+
+static inline void
+sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
+{
+ *port = ntohs(sa->sin6_port);
+ *a = ipa_from_in6(sa->sin6_addr);
+
+ if (ifa && ipa_is_link_local(*a))
+ *ifa = if_find_by_index(sa->sin6_scope_id);
+}
+
+int
+sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
+{
+ if (sa->sa.sa_family != af)
+ goto fail;
+
+ if (af == AF_INET)
+ sockaddr_read4((struct sockaddr_in *) sa, a, port);
+ else if (af == AF_INET6)
+ sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
+ else
+ goto fail;
+
+ return 0;
+
+ fail:
+ *a = IPA_NONE;
+ *port = 0;
+ return -1;
+}
+
+
+/*
+ * IPv6 multicast syscalls
+ */
+
+/* Fortunately standardized in RFC 3493 */
+
+#define INIT_MREQ6(maddr,ifa) \
+ { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
+
+static inline int
+sk_setup_multicast6(sock *s)
+{
+ int index = s->iface->index;
+ int ttl = s->ttl;
+ int n = 0;
+
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
+ ERR("IPV6_MULTICAST_IF");
+
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
+ ERR("IPV6_MULTICAST_HOPS");
+
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
+ ERR("IPV6_MULTICAST_LOOP");
+
+ return 0;
+}
+
+static inline int
+sk_join_group6(sock *s, ip_addr maddr)
+{
+ struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
+
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
+ ERR("IPV6_JOIN_GROUP");
+
+ return 0;
+}
+
+static inline int
+sk_leave_group6(sock *s, ip_addr maddr)
+{
+ struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
+
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
+ ERR("IPV6_LEAVE_GROUP");
+
+ return 0;
+}
+
+
+/*
+ * IPv6 packet control messages
+ */
+
+/* Also standardized, in RFC 3542 */
+
+/*
+ * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
+ * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
+ * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
+ * RFC and we use IPV6_PKTINFO.
+ */
+#ifndef IPV6_RECVPKTINFO
+#define IPV6_RECVPKTINFO IPV6_PKTINFO
+#endif
+/*
+ * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
+ */
+#ifndef IPV6_RECVHOPLIMIT
+#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
+#endif
+
+
+#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
+#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
+
+static inline int
+sk_request_cmsg6_pktinfo(sock *s)
+{
+ int y = 1;
+
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
+ ERR("IPV6_RECVPKTINFO");
+
+ return 0;
+}
+
+static inline int
+sk_request_cmsg6_ttl(sock *s)
+{
+ int y = 1;
+
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
+ ERR("IPV6_RECVHOPLIMIT");
+
+ return 0;
+}
+
+static inline void
+sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
+{
+ if (cm->cmsg_type == IPV6_PKTINFO)
+ {
+ struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
+ s->laddr = ipa_from_in6(pi->ipi6_addr);
+ s->lifindex = pi->ipi6_ifindex;
+ }
+}
+
+static inline void
+sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
+{
+ if (cm->cmsg_type == IPV6_HOPLIMIT)
+ s->rcv_ttl = * (int *) CMSG_DATA(cm);
+}
+
+static inline void
+sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
+{
+ struct cmsghdr *cm;
+ struct in6_pktinfo *pi;
+ int controllen = 0;
+
+ msg->msg_control = cbuf;
+ msg->msg_controllen = cbuflen;
+
+ cm = CMSG_FIRSTHDR(msg);
+ cm->cmsg_level = SOL_IPV6;
+ cm->cmsg_type = IPV6_PKTINFO;
+ cm->cmsg_len = CMSG_LEN(sizeof(*pi));
+ controllen += CMSG_SPACE(sizeof(*pi));
+
+ pi = (struct in6_pktinfo *) CMSG_DATA(cm);
+ pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
+ pi->ipi6_addr = ipa_to_in6(s->saddr);
+
+ msg->msg_controllen = controllen;
+}
+
+
+/*
+ * Miscellaneous socket syscalls
+ */
+
+static inline int
+sk_set_ttl4(sock *s, int ttl)
+{
+ if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
+ ERR("IP_TTL");
+
+ return 0;
+}
+
+static inline int
+sk_set_ttl6(sock *s, int ttl)
+{
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
+ ERR("IPV6_UNICAST_HOPS");
+
+ return 0;
+}
+
+static inline int
+sk_set_tos4(sock *s, int tos)
+{
+ if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
+ ERR("IP_TOS");
+
+ return 0;
+}
+
+static inline int
+sk_set_tos6(sock *s, int tos)
+{
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
+ ERR("IPV6_TCLASS");
+
+ return 0;
+}
+
+static inline int
+sk_set_high_port(sock *s UNUSED)
+{
+ /* Port range setting is optional, ignore it if not supported */
+
+#ifdef IP_PORTRANGE
+ if (sk_is_ipv4(s))
+ {
+ int range = IP_PORTRANGE_HIGH;
+ if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
+ ERR("IP_PORTRANGE");
+ }
+#endif
+
+#ifdef IPV6_PORTRANGE
+ if (sk_is_ipv6(s))
+ {
+ int range = IPV6_PORTRANGE_HIGH;
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
+ ERR("IPV6_PORTRANGE");
+ }
+#endif
+
+ return 0;
+}
+
+static inline int
+sk_set_min_rcvbuf_(sock *s, int bufsize)
+{
+ int oldsize = 0, oldsize_s = sizeof(oldsize);
+
+ if (getsockopt(s->fd, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldsize_s) < 0)
+ ERR("SO_RCVBUF");
+
+ if (oldsize >= bufsize)
+ return 0;
+
+ bufsize = BIRD_ALIGN(bufsize, 64);
+ if (setsockopt(s->fd, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)) < 0)
+ ERR("SO_RCVBUF");
+
+ /*
+ int newsize = 0, newsize_s = sizeof(newsize);
+ if (getsockopt(s->fd, SOL_SOCKET, SO_RCVBUF, &newsize, &newsize_s) < 0)
+ ERR("SO_RCVBUF");
+
+ log(L_INFO "Setting rcvbuf on %s from %d to %d",
+ s->iface ? s->iface->name : "*", oldsize, newsize);
+ */
+
+ return 0;
+}
+
+static void
+sk_set_min_rcvbuf(sock *s, int bufsize)
+{
+ if (sk_set_min_rcvbuf_(s, bufsize) < 0)
+ log(L_WARN "Socket error: %s%#m", s->err);
+}
+
+static inline byte *
+sk_skip_ip_header(byte *pkt, int *len)
+{
+ if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
+ return NULL;
+
+ int hlen = (*pkt & 0x0f) * 4;
+ if ((hlen < 20) || (hlen > *len))
+ return NULL;
+
+ *len -= hlen;
+ return pkt + hlen;
+}
+
+byte *
+sk_rx_buffer(sock *s, int *len)
+{
+ if (sk_is_ipv4(s) && (s->type == SK_IP))
+ return sk_skip_ip_header(s->rbuf, len);
+ else
+ return s->rbuf;
+}
+
+
+/*
+ * Public socket functions
+ */
+
+/**
+ * sk_setup_multicast - enable multicast for given socket
+ * @s: socket
+ *
+ * Prepare transmission of multicast packets for given datagram socket.
+ * The socket must have defined @iface.
+ *
+ * Result: 0 for success, -1 for an error.
+ */
+
+int
+sk_setup_multicast(sock *s)
+{
+ ASSERT(s->iface);
+
+ if (sk_is_ipv4(s))
+ return sk_setup_multicast4(s);
+ else
+ return sk_setup_multicast6(s);
+}
+
+/**
+ * sk_join_group - join multicast group for given socket
+ * @s: socket
+ * @maddr: multicast address
+ *
+ * Join multicast group for given datagram socket and associated interface.
+ * The socket must have defined @iface.
+ *
+ * Result: 0 for success, -1 for an error.
+ */
+
+int
+sk_join_group(sock *s, ip_addr maddr)
+{
+ if (sk_is_ipv4(s))
+ return sk_join_group4(s, maddr);
+ else
+ return sk_join_group6(s, maddr);
+}
+
+/**
+ * sk_leave_group - leave multicast group for given socket
+ * @s: socket
+ * @maddr: multicast address
+ *
+ * Leave multicast group for given datagram socket and associated interface.
+ * The socket must have defined @iface.
+ *
+ * Result: 0 for success, -1 for an error.
+ */
+
+int
+sk_leave_group(sock *s, ip_addr maddr)
+{
+ if (sk_is_ipv4(s))
+ return sk_leave_group4(s, maddr);
+ else
+ return sk_leave_group6(s, maddr);
+}
+
+/**
+ * sk_setup_broadcast - enable broadcast for given socket
+ * @s: socket
+ *
+ * Allow reception and transmission of broadcast packets for given datagram
+ * socket. The socket must have defined @iface. For transmission, packets should
+ * be send to @brd address of @iface.
+ *
+ * Result: 0 for success, -1 for an error.
+ */
+
+int
+sk_setup_broadcast(sock *s)
+{
+ int y = 1;
+
+ if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
+ ERR("SO_BROADCAST");
+
+ return 0;
+}
+
+/**
+ * sk_set_ttl - set transmit TTL for given socket
+ * @s: socket
+ * @ttl: TTL value
+ *
+ * Set TTL for already opened connections when TTL was not set before. Useful
+ * for accepted connections when different ones should have different TTL.
+ *
+ * Result: 0 for success, -1 for an error.
+ */
+
+int
+sk_set_ttl(sock *s, int ttl)
+{
+ s->ttl = ttl;
+
+ if (sk_is_ipv4(s))
+ return sk_set_ttl4(s, ttl);
+ else
+ return sk_set_ttl6(s, ttl);
+}
+
+/**
+ * sk_set_min_ttl - set minimal accepted TTL for given socket
+ * @s: socket
+ * @ttl: TTL value
+ *
+ * Set minimal accepted TTL for given socket. Can be used for TTL security.
+ * implementations.
+ *
+ * Result: 0 for success, -1 for an error.
+ */
+
+int
+sk_set_min_ttl(sock *s, int ttl)
+{
+ if (sk_is_ipv4(s))
+ return sk_set_min_ttl4(s, ttl);
+ else
+ return sk_set_min_ttl6(s, ttl);
+}
+
+#if 0
+/**
+ * sk_set_md5_auth - add / remove MD5 security association for given socket
+ * @s: socket
+ * @local: IP address of local side
+ * @remote: IP address of remote side
+ * @ifa: Interface for link-local IP address
+ * @passwd: Password used for MD5 authentication
+ * @setkey: Update also system SA/SP database
+ *
+ * In TCP MD5 handling code in kernel, there is a set of security associations
+ * used for choosing password and other authentication parameters according to
+ * the local and remote address. This function is useful for listening socket,
+ * for active sockets it may be enough to set s->password field.
+ *
+ * When called with passwd != NULL, the new pair is added,
+ * When called with passwd == NULL, the existing pair is removed.
+ *
+ * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
+ * stored in global SA/SP database (but the behavior also must be enabled on
+ * per-socket basis). In case of multiple sockets to the same neighbor, the
+ * socket-specific state must be configured for each socket while global state
+ * just once per src-dst pair. The @setkey argument controls whether the global
+ * state (SA/SP database) is also updated.
+ *
+ * Result: 0 for success, -1 for an error.
+ */
+
+int
+sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
+{ DUMMY; }
+#endif
+
+/**
+ * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
+ * @s: socket
+ * @offset: offset
+ *
+ * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
+ * kernel will automatically fill it for outgoing packets and check it for
+ * incoming packets. Should not be used on ICMPv6 sockets, where the position is
+ * known to the kernel.
+ *
+ * Result: 0 for success, -1 for an error.
+ */
+
+int
+sk_set_ipv6_checksum(sock *s, int offset)
+{
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
+ ERR("IPV6_CHECKSUM");
+
+ return 0;
+}
+
+int
+sk_set_icmp6_filter(sock *s, int p1, int p2)
+{
+ /* a bit of lame interface, but it is here only for Radv */
+ struct icmp6_filter f;
+
+ ICMP6_FILTER_SETBLOCKALL(&f);
+ ICMP6_FILTER_SETPASS(p1, &f);
+ ICMP6_FILTER_SETPASS(p2, &f);
+
+ if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
+ ERR("ICMP6_FILTER");
+
+ return 0;
+}
+
+void
+sk_log_error(sock *s, const char *p)
+{
+ log(L_ERR "%s: Socket error: %s%#m", p, s->err);
+}
+
+
+/*
+ * Actual struct birdsock code
+ */
+
+sock *
+sk_next(sock *s)
+{
+ if (!s->n.next->next)
+ return NULL;
+ else
+ return SKIP_BACK(sock, n, s->n.next);
+}
+
+static void
+sk_alloc_bufs(sock *s)
+{
+ if (!s->rbuf && s->rbsize)
+ s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
+ s->rpos = s->rbuf;
+ if (!s->tbuf && s->tbsize)
+ s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
+ s->tpos = s->ttx = s->tbuf;
+}
+
+static void
+sk_free_bufs(sock *s)
+{
+ if (s->rbuf_alloc)
+ {
+ xfree(s->rbuf_alloc);
+ s->rbuf = s->rbuf_alloc = NULL;
+ }
+ if (s->tbuf_alloc)
+ {
+ xfree(s->tbuf_alloc);
+ s->tbuf = s->tbuf_alloc = NULL;
+ }
+}
+
+#ifdef HAVE_LIBSSH
+static void
+sk_ssh_free(sock *s)
+{
+ struct ssh_sock *ssh = s->ssh;
+
+ if (s->ssh == NULL)
+ return;
+
+ s->ssh = NULL;
+
+ if (ssh->channel)
+ {
+ ssh_channel_close(ssh->channel);
+ ssh_channel_free(ssh->channel);
+ ssh->channel = NULL;
+ }
+
+ if (ssh->session)
+ {
+ ssh_disconnect(ssh->session);
+ ssh_free(ssh->session);
+ ssh->session = NULL;
+ }
+}
+#endif
+
+
+static void
+sk_free(resource *r)
+{
+ sock *s = (sock *) r;
+
+ sk_free_bufs(s);
+
+#ifdef HAVE_LIBSSH
+ if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE)
+ sk_ssh_free(s);
+#endif
+
+ if (s->loop)
+ birdloop_remove_socket(s->loop, s);
+
+ if (s->fd >= 0 && s->type != SK_SSH && s->type != SK_SSH_ACTIVE)
+ close(s->fd);
+
+ s->fd = -1;
+}
+
+void
+sk_set_rbsize(sock *s, uint val)
+{
+ ASSERT(s->rbuf_alloc == s->rbuf);
+
+ if (s->rbsize == val)
+ return;
+
+ s->rbsize = val;
+ xfree(s->rbuf_alloc);
+ s->rbuf_alloc = xmalloc(val);
+ s->rpos = s->rbuf = s->rbuf_alloc;
+
+ if ((s->type == SK_UDP) || (s->type == SK_IP))
+ sk_set_min_rcvbuf(s, s->rbsize);
+}
+
+void
+sk_set_tbsize(sock *s, uint val)
+{
+ ASSERT(s->tbuf_alloc == s->tbuf);
+
+ if (s->tbsize == val)
+ return;
+
+ byte *old_tbuf = s->tbuf;
+
+ s->tbsize = val;
+ s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
+ s->tpos = s->tbuf + (s->tpos - old_tbuf);
+ s->ttx = s->tbuf + (s->ttx - old_tbuf);
+}
+
+void
+sk_set_tbuf(sock *s, void *tbuf)
+{
+ s->tbuf = tbuf ?: s->tbuf_alloc;
+ s->ttx = s->tpos = s->tbuf;
+}
+
+void
+sk_reallocate(sock *s)
+{
+ sk_free_bufs(s);
+ sk_alloc_bufs(s);
+}
+
+static void
+sk_dump(struct dump_request *dreq, resource *r)
+{
+ sock *s = (sock *) r;
+ static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
+
+ RDUMP("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
+ sk_type_names[s->type],
+ s->data,
+ s->saddr,
+ s->sport,
+ s->daddr,
+ s->dport,
+ s->tos,
+ s->ttl,
+ s->iface ? s->iface->name : "none");
+}
+
+static struct resclass sk_class = {
+ "Socket",
+ sizeof(sock),
+ sk_free,
+ sk_dump,
+ NULL,
+ NULL
+};
+
+/**
+ * sk_new - create a socket
+ * @p: pool
+ *
+ * This function creates a new socket resource. If you want to use it,
+ * you need to fill in all the required fields of the structure and
+ * call sk_open() to do the actual opening of the socket.
+ *
+ * The real function name is sock_new(), sk_new() is a macro wrapper
+ * to avoid collision with OpenSSL.
+ */
+sock *
+sock_new(pool *p)
+{
+ sock *s = ralloc(p, &sk_class);
+ s->pool = p;
+ // s->saddr = s->daddr = IPA_NONE;
+ s->tos = s->priority = s->ttl = -1;
+ s->fd = -1;
+ return s;
+}
+
+static int
+sk_setup(sock *s)
+{
+ int y = 1;
+ int fd = s->fd;
+
+ if (s->type == SK_SSH_ACTIVE)
+ return 0;
+
+ if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
+ ERR("O_NONBLOCK");
+
+ if (!s->af)
+ return 0;
+
+ if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
+ s->flags |= SKF_PKTINFO;
+
+#ifdef CONFIG_USE_HDRINCL
+ if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
+ {
+ s->flags &= ~SKF_PKTINFO;
+ s->flags |= SKF_HDRINCL;
+ if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
+ ERR("IP_HDRINCL");
+ }
+#endif
+
+ if (s->vrf && (s->vrf != &default_vrf) && !s->iface && (s->type != SK_TCP))
+ {
+ /* Bind socket to associated VRF interface.
+ This is Linux-specific, but so is SO_BINDTODEVICE.
+ For accepted TCP sockets it is inherited from the listening one. */
+#ifdef SO_BINDTODEVICE
+ struct ifreq ifr = {};
+ strcpy(ifr.ifr_name, s->vrf->name);
+ if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
+ ERR("SO_BINDTODEVICE");
+#endif
+ }
+
+ if (s->iface)
+ {
+#ifdef SO_BINDTODEVICE
+ struct ifreq ifr = {};
+ strcpy(ifr.ifr_name, s->iface->name);
+ if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
+ ERR("SO_BINDTODEVICE");
+#endif
+
+#ifdef CONFIG_UNIX_DONTROUTE
+ if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
+ ERR("SO_DONTROUTE");
+#endif
+ }
+
+ if (sk_is_ipv4(s))
+ {
+ if (s->flags & SKF_LADDR_RX)
+ if (sk_request_cmsg4_pktinfo(s) < 0)
+ return -1;
+
+ if (s->flags & SKF_TTL_RX)
+ if (sk_request_cmsg4_ttl(s) < 0)
+ return -1;
+
+ if ((s->type == SK_UDP) || (s->type == SK_IP))
+ if (sk_disable_mtu_disc4(s) < 0)
+ return -1;
+
+ if (s->ttl >= 0)
+ if (sk_set_ttl4(s, s->ttl) < 0)
+ return -1;
+
+ if (s->tos >= 0)
+ if (sk_set_tos4(s, s->tos) < 0)
+ return -1;
+ }
+
+ if (sk_is_ipv6(s))
+ {
+ if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP))
+ if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
+ ERR("IPV6_V6ONLY");
+
+ if (s->flags & SKF_LADDR_RX)
+ if (sk_request_cmsg6_pktinfo(s) < 0)
+ return -1;
+
+ if (s->flags & SKF_TTL_RX)
+ if (sk_request_cmsg6_ttl(s) < 0)
+ return -1;
+
+ if ((s->type == SK_UDP) || (s->type == SK_IP))
+ if (sk_disable_mtu_disc6(s) < 0)
+ return -1;
+
+ if (s->ttl >= 0)
+ if (sk_set_ttl6(s, s->ttl) < 0)
+ return -1;
+
+ if (s->tos >= 0)
+ if (sk_set_tos6(s, s->tos) < 0)
+ return -1;
+
+ if ((s->flags & SKF_UDP6_NO_CSUM_RX) && (s->type == SK_UDP))
+ if (sk_set_udp6_no_csum_rx(s) < 0)
+ return -1;
+ }
+
+ /* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */
+ if (s->priority >= 0)
+ if (sk_set_priority(s, s->priority) < 0)
+ return -1;
+
+ if ((s->type == SK_UDP) || (s->type == SK_IP))
+ sk_set_min_rcvbuf(s, s->rbsize);
+
+ return 0;
+}
+
+static void
+sk_err_hook(sock *s, int e)
+{
+ if (s->rx_paused)
+ CALL(s->err_paused, s, e);
+
+ s->err_hook(s, e);
+}
+
+static int
+sk_connect(sock *s)
+{
+ sockaddr sa;
+ sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
+ return connect(s->fd, &sa.sa, SA_LEN(sa));
+}
+
+static void
+sk_tcp_connected(sock *s)
+{
+ sockaddr sa;
+ int sa_len = sizeof(sa);
+
+ if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
+ (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
+ log(L_WARN "SOCK: Cannot get local IP address for TCP>");
+
+ s->type = SK_TCP;
+ sk_alloc_bufs(s);
+ s->tx_hook(s);
+}
+
+#ifdef HAVE_LIBSSH
+static void
+sk_ssh_connected(sock *s)
+{
+ sk_alloc_bufs(s);
+ s->type = SK_SSH;
+ s->tx_hook(s);
+}
+#endif
+
+static int
+sk_passive_connected(sock *s, int type)
+{
+ sockaddr loc_sa, rem_sa;
+ int loc_sa_len = sizeof(loc_sa);
+ int rem_sa_len = sizeof(rem_sa);
+
+ int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
+ if (fd < 0)
+ {
+ if ((errno != EINTR) && (errno != EAGAIN))
+ sk_err_hook(s, errno);
+ return 0;
+ }
+
+ struct domain_generic *sock_lock = DG_IS_LOCKED(s->pool->domain) ? NULL : s->pool->domain;
+ if (sock_lock)
+ DG_LOCK(sock_lock);
+
+ sock *t = sk_new(s->pool);
+ t->type = type;
+ t->data = s->data;
+ t->af = s->af;
+ t->fd = fd;
+ t->ttl = s->ttl;
+ t->tos = s->tos;
+ t->vrf = s->vrf;
+ t->rbsize = s->rbsize;
+ t->tbsize = s->tbsize;
+
+ if (type == SK_TCP)
+ {
+ if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
+ (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
+ log(L_WARN "SOCK: Cannot get local IP address for TCP<");
+
+ if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
+ log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
+ }
+
+ if (sk_setup(t) < 0)
+ {
+ /* FIXME: Call err_hook instead ? */
+ log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
+
+ /* FIXME: handle it better in rfree() */
+ close(t->fd);
+ t->fd = -1;
+ sk_close(t);
+ t = NULL;
+ }
+ else
+ {
+ birdloop_add_socket(s->loop, t);
+ sk_alloc_bufs(t);
+ }
+
+ if (sock_lock)
+ DG_UNLOCK(sock_lock);
+
+ if (t)
+ s->rx_hook(t, 0);
+
+ return 1;
+}
+
+#ifdef HAVE_LIBSSH
+/*
+ * Return SSH_OK or SSH_AGAIN or SSH_ERROR
+ */
+static int
+sk_ssh_connect(sock *s)
+{
+ s->fd = ssh_get_fd(s->ssh->session);
+
+ /* Big fall thru automata */
+ switch (s->ssh->state)
+ {
+ case SK_SSH_CONNECT:
+ {
+ switch (ssh_connect(s->ssh->session))
+ {
+ case SSH_AGAIN:
+ /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
+ * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
+ * documented but our code relies on that.
+ */
+ return SSH_AGAIN;
+
+ case SSH_OK:
+ break;
+
+ default:
+ return SSH_ERROR;
+ }
+ } /* fallthrough */
+
+ case SK_SSH_SERVER_KNOWN:
+ {
+ s->ssh->state = SK_SSH_SERVER_KNOWN;
+
+ if (s->ssh->server_hostkey_path)
+ {
+ int server_identity_is_ok = 1;
+
+#ifdef HAVE_SSH_OLD_SERVER_VALIDATION_API
+#define ssh_session_is_known_server ssh_is_server_known
+#define SSH_KNOWN_HOSTS_OK SSH_SERVER_KNOWN_OK
+#define SSH_KNOWN_HOSTS_UNKNOWN SSH_SERVER_NOT_KNOWN
+#define SSH_KNOWN_HOSTS_CHANGED SSH_SERVER_KNOWN_CHANGED
+#define SSH_KNOWN_HOSTS_NOT_FOUND SSH_SERVER_FILE_NOT_FOUND
+#define SSH_KNOWN_HOSTS_ERROR SSH_SERVER_ERROR
+#define SSH_KNOWN_HOSTS_OTHER SSH_SERVER_FOUND_OTHER
+#endif
+
+ /* Check server identity */
+ switch (ssh_session_is_known_server(s->ssh->session))
+ {
+#define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
+ case SSH_KNOWN_HOSTS_OK:
+ /* The server is known and has not changed. */
+ break;
+
+ case SSH_KNOWN_HOSTS_UNKNOWN:
+ LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path);
+ server_identity_is_ok = 0;
+ break;
+
+ case SSH_KNOWN_HOSTS_CHANGED:
+ LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key.");
+ server_identity_is_ok = 0;
+ break;
+
+ case SSH_KNOWN_HOSTS_NOT_FOUND:
+ LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path);
+ server_identity_is_ok = 0;
+ break;
+
+ case SSH_KNOWN_HOSTS_ERROR:
+ LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened");
+ server_identity_is_ok = 0;
+ break;
+
+ case SSH_KNOWN_HOSTS_OTHER:
+ LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had another type recorded. " \
+ "It is a possible attack.");
+ server_identity_is_ok = 0;
+ break;
+ }
+
+ if (!server_identity_is_ok)
+ return SSH_ERROR;
+ }
+ } /* fallthrough */
+
+ case SK_SSH_USERAUTH:
+ {
+ s->ssh->state = SK_SSH_USERAUTH;
+ switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL))
+ {
+ case SSH_AUTH_AGAIN:
+ return SSH_AGAIN;
+
+ case SSH_AUTH_SUCCESS:
+ break;
+
+ default:
+ return SSH_ERROR;
+ }
+ } /* fallthrough */
+
+ case SK_SSH_CHANNEL:
+ {
+ s->ssh->state = SK_SSH_CHANNEL;
+ s->ssh->channel = ssh_channel_new(s->ssh->session);
+ if (s->ssh->channel == NULL)
+ return SSH_ERROR;
+ } /* fallthrough */
+
+ case SK_SSH_SESSION:
+ {
+ s->ssh->state = SK_SSH_SESSION;
+ switch (ssh_channel_open_session(s->ssh->channel))
+ {
+ case SSH_AGAIN:
+ return SSH_AGAIN;
+
+ case SSH_OK:
+ break;
+
+ default:
+ return SSH_ERROR;
+ }
+ } /* fallthrough */
+
+ case SK_SSH_SUBSYSTEM:
+ {
+ s->ssh->state = SK_SSH_SUBSYSTEM;
+ if (s->ssh->subsystem)
+ {
+ switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem))
+ {
+ case SSH_AGAIN:
+ return SSH_AGAIN;
+
+ case SSH_OK:
+ break;
+
+ default:
+ return SSH_ERROR;
+ }
+ }
+ } /* fallthrough */
+
+ case SK_SSH_ESTABLISHED:
+ s->ssh->state = SK_SSH_ESTABLISHED;
+ }
+
+ return SSH_OK;
+}
+
+/*
+ * Return file descriptor number if success
+ * Return -1 if failed
+ */
+static int
+sk_open_ssh(sock *s)
+{
+ if (!s->ssh)
+ bug("sk_open() sock->ssh is not allocated");
+
+ ssh_session sess = ssh_new();
+ if (sess == NULL)
+ ERR2("Cannot create a ssh session");
+ s->ssh->session = sess;
+
+ const int verbosity = SSH_LOG_NOLOG;
+ ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity);
+ ssh_options_set(sess, SSH_OPTIONS_HOST, s->host);
+ ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport));
+ /* TODO: Add SSH_OPTIONS_BINDADDR */
+ ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username);
+
+ if (s->ssh->server_hostkey_path)
+ ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path);
+
+ if (s->ssh->client_privkey_path)
+ ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path);
+
+ ssh_set_blocking(sess, 0);
+
+ switch (sk_ssh_connect(s))
+ {
+ case SSH_AGAIN:
+ break;
+
+ case SSH_OK:
+ sk_ssh_connected(s);
+ break;
+
+ case SSH_ERROR:
+ ERR2(ssh_get_error(sess));
+ break;
+ }
+
+ return ssh_get_fd(sess);
+
+ err:
+ return -1;
+}
+#endif
+
+/**
+ * sk_open - open a socket
+ * @loop: loop
+ * @s: socket
+ *
+ * This function takes a socket resource created by sk_new() and
+ * initialized by the user and binds a corresponding network connection
+ * to it.
+ *
+ * Result: 0 for success, -1 for an error.
+ */
+int
+sk_open(sock *s, struct birdloop *loop)
+{
+ int af = AF_UNSPEC;
+ int fd = -1;
+ int do_bind = 0;
+ int bind_port = 0;
+ ip_addr bind_addr = IPA_NONE;
+ sockaddr sa;
+
+ if (s->type <= SK_IP)
+ {
+ /*
+ * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
+ * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
+ * But the specifications have to be consistent.
+ */
+
+ switch (s->subtype)
+ {
+ case 0:
+ ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) ||
+ (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr)));
+ af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6;
+ break;
+
+ case SK_IPV4:
+ ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr));
+ ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr));
+ af = AF_INET;
+ break;
+
+ case SK_IPV6:
+ ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr));
+ ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr));
+ af = AF_INET6;
+ break;
+
+ default:
+ bug("Invalid subtype %d", s->subtype);
+ }
+ }
+
+ switch (s->type)
+ {
+ case SK_TCP_ACTIVE:
+ s->ttx = ""; /* Force s->ttx != s->tpos */
+ /* Fall thru */
+ case SK_TCP_PASSIVE:
+ fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
+ bind_port = s->sport;
+ bind_addr = s->saddr;
+ do_bind = bind_port || ipa_nonzero(bind_addr);
+ break;
+
+#ifdef HAVE_LIBSSH
+ case SK_SSH_ACTIVE:
+ s->ttx = ""; /* Force s->ttx != s->tpos */
+ fd = sk_open_ssh(s);
+ break;
+#endif
+
+ case SK_UDP:
+ fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
+ bind_port = s->sport;
+ bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
+ do_bind = 1;
+ break;
+
+ case SK_IP:
+ fd = socket(af, SOCK_RAW, s->dport);
+ bind_port = 0;
+ bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
+ do_bind = ipa_nonzero(bind_addr);
+ break;
+
+ case SK_MAGIC:
+ af = 0;
+ fd = s->fd;
+ break;
+
+ default:
+ bug("sk_open() called for invalid sock type %d", s->type);
+ }
+
+ if (fd < 0)
+ ERR("socket");
+
+ s->af = af;
+ s->fd = fd;
+
+ if (sk_setup(s) < 0)
+ goto err;
+
+ if (do_bind)
+ {
+ if (bind_port)
+ {
+ int y = 1;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
+ ERR2("SO_REUSEADDR");
+
+#ifdef CONFIG_NO_IFACE_BIND
+ /* Workaround missing ability to bind to an iface */
+ if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
+ {
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
+ ERR2("SO_REUSEPORT");
+ }
+#endif
+ }
+ else
+ if (s->flags & SKF_HIGH_PORT)
+ if (sk_set_high_port(s) < 0)
+ log(L_WARN "Socket error: %s%#m", s->err);
+
+ if (s->flags & SKF_FREEBIND)
+ if (sk_set_freebind(s) < 0)
+ log(L_WARN "Socket error: %s%#m", s->err);
+
+ sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
+ if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
+ ERR2("bind");
+ }
+
+ if (s->password)
+ if (sk_set_md5_auth(s, s->saddr, s->daddr, -1, s->iface, s->password, 0) < 0)
+ goto err;
+
+ switch (s->type)
+ {
+ case SK_TCP_ACTIVE:
+ if (sk_connect(s) >= 0)
+ sk_tcp_connected(s);
+ else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
+ errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
+ ERR2("connect");
+ break;
+
+ case SK_TCP_PASSIVE:
+ if (listen(fd, 8) < 0)
+ ERR2("listen");
+ break;
+
+ case SK_UDP:
+ if (s->flags & SKF_CONNECT)
+ if (sk_connect(s) < 0)
+ ERR2("connect");
+
+ sk_alloc_bufs(s);
+ break;
+
+ case SK_SSH_ACTIVE:
+ case SK_MAGIC:
+ break;
+
+ default:
+ sk_alloc_bufs(s);
+ }
+
+ birdloop_add_socket(loop, s);
+ return 0;
+
+err:
+ close(fd);
+ s->fd = -1;
+ return -1;
+}
+
+int
+sk_open_unix(sock *s, struct birdloop *loop, const char *name)
+{
+ struct sockaddr_un sa;
+ int fd;
+
+ /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -1;
+
+ if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
+ return -1;
+
+ /* Path length checked in test_old_bird() but we may need unix sockets for other reasons in future */
+ ASSERT_DIE(strlen(name) < sizeof(sa.sun_path));
+
+ sa.sun_family = AF_UNIX;
+ strcpy(sa.sun_path, name);
+
+ if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
+ return -1;
+
+ if (listen(fd, 8) < 0)
+ return -1;
+
+ s->fd = fd;
+ birdloop_add_socket(loop, s);
+ return 0;
+}
+
+
+#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
+ CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
+#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
+
+static void
+sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
+{
+ if (sk_is_ipv4(s))
+ sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
+ else
+ sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
+}
+
+static void
+sk_process_cmsgs(sock *s, struct msghdr *msg)
+{
+ struct cmsghdr *cm;
+
+ s->laddr = IPA_NONE;
+ s->lifindex = 0;
+ s->rcv_ttl = -1;
+
+ for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
+ {
+ if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
+ {
+ sk_process_cmsg4_pktinfo(s, cm);
+ sk_process_cmsg4_ttl(s, cm);
+ }
+
+ if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
+ {
+ sk_process_cmsg6_pktinfo(s, cm);
+ sk_process_cmsg6_ttl(s, cm);
+ }
+ }
+}
+
+
+static inline int
+sk_sendmsg(sock *s)
+{
+ struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
+ byte cmsg_buf[CMSG_TX_SPACE];
+ sockaddr dst;
+ int flags = 0;
+
+ sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
+
+ struct msghdr msg = {
+ .msg_name = &dst.sa,
+ .msg_namelen = SA_LEN(dst),
+ .msg_iov = &iov,
+ .msg_iovlen = 1
+ };
+
+#ifdef CONFIG_DONTROUTE_UNICAST
+ /* FreeBSD silently changes TTL to 1 when MSG_DONTROUTE is used, therefore we
+ cannot use it for other cases (e.g. when TTL security is used). */
+ if (ipa_is_ip4(s->daddr) && ip4_is_unicast(ipa_to_ip4(s->daddr)) && (s->ttl == 1))
+ flags = MSG_DONTROUTE;
+#endif
+
+#ifdef CONFIG_USE_HDRINCL
+ byte hdr[20];
+ struct iovec iov2[2] = { {hdr, 20}, iov };
+
+ if (s->flags & SKF_HDRINCL)
+ {
+ sk_prepare_ip_header(s, hdr, iov.iov_len);
+ msg.msg_iov = iov2;
+ msg.msg_iovlen = 2;
+ }
+#endif
+
+ if (s->flags & SKF_PKTINFO)
+ sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
+
+ return sendmsg(s->fd, &msg, flags);
+}
+
+static inline int
+sk_recvmsg(sock *s)
+{
+ struct iovec iov = {s->rbuf, s->rbsize};
+ byte cmsg_buf[CMSG_RX_SPACE];
+ sockaddr src;
+
+ struct msghdr msg = {
+ .msg_name = &src.sa,
+ .msg_namelen = sizeof(src), // XXXX ??
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = cmsg_buf,
+ .msg_controllen = sizeof(cmsg_buf),
+ .msg_flags = 0
+ };
+
+ int rv = recvmsg(s->fd, &msg, 0);
+ if (rv < 0)
+ return rv;
+
+ //ifdef IPV4
+ // if (cf_type == SK_IP)
+ // rv = ipv4_skip_header(pbuf, rv);
+ //endif
+
+ sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
+ sk_process_cmsgs(s, &msg);
+
+ if (msg.msg_flags & MSG_TRUNC)
+ s->flags |= SKF_TRUNCATED;
+ else
+ s->flags &= ~SKF_TRUNCATED;
+
+ return rv;
+}
+
+
+static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
+
+bool
+sk_tx_pending(sock *s)
+{
+ return s->ttx != s->tpos;
+}
+
+
+static int
+sk_maybe_write(sock *s)
+{
+ int e;
+
+ switch (s->type)
+ {
+ case SK_TCP:
+ case SK_MAGIC:
+ case SK_UNIX:
+ while (sk_tx_pending(s))
+ {
+ e = write(s->fd, s->ttx, s->tpos - s->ttx);
+
+ if (e < 0)
+ {
+ if (errno != EINTR && errno != EAGAIN)
+ {
+ reset_tx_buffer(s);
+ /* EPIPE is just a connection close notification during TX */
+ sk_err_hook(s, (errno != EPIPE) ? errno : 0);
+ return -1;
+ }
+ return 0;
+ }
+ s->ttx += e;
+ }
+ reset_tx_buffer(s);
+ return 1;
+
+#ifdef HAVE_LIBSSH
+ case SK_SSH:
+ while (sk_tx_pending(s))
+ {
+ e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx);
+
+ if (e < 0)
+ {
+ s->err = ssh_get_error(s->ssh->session);
+ sk_err_hook(s, ssh_get_error_code(s->ssh->session));
+
+ reset_tx_buffer(s);
+ /* EPIPE is just a connection close notification during TX */
+ sk_err_hook(s, (errno != EPIPE) ? errno : 0);
+ return -1;
+ }
+ s->ttx += e;
+ }
+ reset_tx_buffer(s);
+ return 1;
+#endif
+
+ case SK_UDP:
+ case SK_IP:
+ {
+ if (s->tbuf == s->tpos)
+ return 1;
+
+ e = sk_sendmsg(s);
+
+ if (e < 0)
+ {
+ if (errno != EINTR && errno != EAGAIN)
+ {
+ reset_tx_buffer(s);
+ sk_err_hook(s, errno);
+ return -1;
+ }
+
+ if (!s->tx_hook)
+ reset_tx_buffer(s);
+ return 0;
+ }
+ reset_tx_buffer(s);
+ return 1;
+ }
+
+ default:
+ bug("sk_maybe_write: unknown socket type %d", s->type);
+ }
+}
+
+int
+sk_rx_ready(sock *s)
+{
+ int rv;
+ struct pollfd pfd = { .fd = s->fd };
+ pfd.events |= POLLIN;
+
+ redo:
+ rv = poll(&pfd, 1, 0);
+
+ if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
+ goto redo;
+
+ return rv;
+}
+
+/**
+ * sk_send - send data to a socket
+ * @s: socket
+ * @len: number of bytes to send
+ *
+ * This function sends @len bytes of data prepared in the
+ * transmit buffer of the socket @s to the network connection.
+ * If the packet can be sent immediately, it does so and returns
+ * 1, else it queues the packet for later processing, returns 0
+ * and calls the @tx_hook of the socket when the tranmission
+ * takes place.
+ */
+int
+sk_send(sock *s, unsigned len)
+{
+ s->ttx = s->tbuf;
+ s->tpos = s->tbuf + len;
+
+ int e = sk_maybe_write(s);
+ if (e == 0) /* Trigger thread poll reload to poll this socket's write. */
+ socket_changed(s);
+
+ return e;
+}
+
+/**
+ * sk_send_to - send data to a specific destination
+ * @s: socket
+ * @len: number of bytes to send
+ * @addr: IP address to send the packet to
+ * @port: port to send the packet to
+ *
+ * This is a sk_send() replacement for connection-less packet sockets
+ * which allows destination of the packet to be chosen dynamically.
+ * Raw IP sockets should use 0 for @port.
+ */
+int
+sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
+{
+ s->daddr = addr;
+ if (port)
+ s->dport = port;
+
+ s->ttx = s->tbuf;
+ s->tpos = s->tbuf + len;
+ return sk_maybe_write(s);
+}
+
+/*
+int
+sk_send_full(sock *s, unsigned len, struct iface *ifa,
+ ip_addr saddr, ip_addr daddr, unsigned dport)
+{
+ s->iface = ifa;
+ s->saddr = saddr;
+ s->daddr = daddr;
+ s->dport = dport;
+ s->ttx = s->tbuf;
+ s->tpos = s->tbuf + len;
+ return sk_maybe_write(s);
+}
+*/
+
+static void
+call_rx_hook(sock *s, int size)
+{
+ if (s->rx_hook(s, size))
+ {
+ /* We need to be careful since the socket could have been deleted by the hook */
+ if (s->loop->sock_active == s)
+ s->rpos = s->rbuf;
+ }
+}
+
+#ifdef HAVE_LIBSSH
+static int
+sk_read_ssh(sock *s)
+{
+ ssh_channel rchans[2] = { s->ssh->channel, NULL };
+ struct timeval timev = { 1, 0 };
+
+ if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR)
+ return 1; /* Try again */
+
+ if (ssh_channel_is_eof(s->ssh->channel) != 0)
+ {
+ /* The remote side is closing the connection */
+ sk_err_hook(s, 0);
+ return 0;
+ }
+
+ if (rchans[0] == NULL)
+ return 0; /* No data is available on the socket */
+
+ const uint used_bytes = s->rpos - s->rbuf;
+ const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0);
+ if (read_bytes > 0)
+ {
+ /* Received data */
+ s->rpos += read_bytes;
+ call_rx_hook(s, used_bytes + read_bytes);
+ return 1;
+ }
+ else if (read_bytes == 0)
+ {
+ if (ssh_channel_is_eof(s->ssh->channel) != 0)
+ {
+ /* The remote side is closing the connection */
+ sk_err_hook(s, 0);
+ }
+ }
+ else
+ {
+ s->err = ssh_get_error(s->ssh->session);
+ sk_err_hook(s, ssh_get_error_code(s->ssh->session));
+ }
+
+ return 0; /* No data is available on the socket */
+}
+#endif
+
+ /* sk_read() and sk_write() are called from BFD's event loop */
+
+static inline int
+sk_read_noflush(sock *s, int revents)
+{
+ switch (s->type)
+ {
+ case SK_TCP_PASSIVE:
+ return sk_passive_connected(s, SK_TCP);
+
+ case SK_UNIX_PASSIVE:
+ return sk_passive_connected(s, SK_UNIX);
+
+ case SK_TCP:
+ case SK_UNIX:
+ {
+ int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
+
+ if (c < 0)
+ {
+ if (errno != EINTR && errno != EAGAIN)
+ sk_err_hook(s, errno);
+ else if (errno == EAGAIN && !(revents & POLLIN))
+ {
+ log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
+ sk_err_hook(s, 0);
+ }
+ }
+ else if (!c)
+ sk_err_hook(s, 0);
+ else
+ {
+ s->rpos += c;
+ call_rx_hook(s, s->rpos - s->rbuf);
+ return 1;
+ }
+ return 0;
+ }
+
+#ifdef HAVE_LIBSSH
+ case SK_SSH:
+ return sk_read_ssh(s);
+#endif
+
+ case SK_MAGIC:
+ return s->rx_hook(s, 0);
+
+ default:
+ {
+ int e = sk_recvmsg(s);
+
+ if (e < 0)
+ {
+ if (errno != EINTR && errno != EAGAIN)
+ sk_err_hook(s, errno);
+ return 0;
+ }
+
+ s->rpos = s->rbuf + e;
+ s->rx_hook(s, e);
+ return 1;
+ }
+ }
+}
+
+int
+sk_read(sock *s, int revents)
+{
+ int e = sk_read_noflush(s, revents);
+ tmp_flush();
+ return e;
+}
+
+static inline int
+sk_write_noflush(sock *s)
+{
+ switch (s->type)
+ {
+ case SK_TCP_ACTIVE:
+ {
+ if (sk_connect(s) >= 0 || errno == EISCONN)
+ sk_tcp_connected(s);
+ else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
+ sk_err_hook(s, errno);
+ return 0;
+ }
+
+#ifdef HAVE_LIBSSH
+ case SK_SSH_ACTIVE:
+ {
+ switch (sk_ssh_connect(s))
+ {
+ case SSH_OK:
+ sk_ssh_connected(s);
+ break;
+
+ case SSH_AGAIN:
+ return 1;
+
+ case SSH_ERROR:
+ s->err = ssh_get_error(s->ssh->session);
+ sk_err_hook(s, ssh_get_error_code(s->ssh->session));
+ break;
+ }
+ return 0;
+ }
+#endif
+
+ default:
+ if (sk_tx_pending(s) && sk_maybe_write(s) > 0)
+ {
+ if (s->tx_hook)
+ s->tx_hook(s);
+ return 1;
+ }
+ return 0;
+ }
+}
+
+int
+sk_write(sock *s)
+{
+ int e = sk_write_noflush(s);
+ tmp_flush();
+ return e;
+}
+
+int sk_is_ipv4(sock *s)
+{ return s->af == AF_INET; }
+
+int sk_is_ipv6(sock *s)
+{ return s->af == AF_INET6; }
+
+void
+sk_err(sock *s, int revents)
+{
+ int se = 0, sse = sizeof(se);
+ if ((s->type != SK_MAGIC) && (revents & POLLERR))
+ if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
+ {
+ log(L_ERR "IO: Socket error: SO_ERROR: %m");
+ se = 0;
+ }
+
+ sk_err_hook(s, se);
+ tmp_flush();
+}
+
+void
+sk_dump_all(struct dump_request *dreq)
+{
+ node *n;
+ sock *s;
+
+ RDUMP("Open sockets:\n");
+ dreq->indent += 3;
+ WALK_LIST(n, main_birdloop.sock_list)
+ {
+ s = SKIP_BACK(sock, n, n);
+ RDUMP("%p ", s);
+ sk_dump(dreq, &s->r);
+ }
+ dreq->indent -= 3;
+ RDUMP("\n");
+}
--- /dev/null
+/*
+ * BIRD Internet Routing Daemon -- Clock
+ *
+ * (c) 1998--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Ondrej Filip <feela@network.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+/* Unfortunately, some glibc versions hide parts of RFC 3542 API
+ if _GNU_SOURCE is not defined. */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <poll.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/icmp6.h>
+#include <netdb.h>
+
+#include "nest/bird.h"
+#include "lib/lists.h"
+#include "lib/resource.h"
+#include "lib/socket.h"
+#include "lib/event.h"
+#include "lib/locking.h"
+#include "lib/timer.h"
+#include "lib/string.h"
+#include "nest/iface.h"
+#include "conf/conf.h"
+
+#include "sysdep/unix/unix.h"
+#include "sysdep/unix/io-loop.h"
+
+/* Maximum number of calls of tx handler for one socket in one
+ * poll iteration. Should be small enough to not monopolize CPU by
+ * one protocol instance.
+ */
+#define MAX_STEPS 4
+
+/* Maximum number of calls of rx handler for all sockets in one poll
+ iteration. RX callbacks are often much more costly so we limit
+ this to gen small latencies */
+#define MAX_RX_STEPS 4
+
+
+/*
+ * Time clock
+ */
+
+btime boot_time;
+
+
+void
+times_update(void)
+{
+ struct timespec ts;
+ int rv;
+
+ btime old_time = current_time();
+ btime old_real_time = current_real_time();
+
+ rv = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (rv < 0)
+ die("Monotonic clock is missing");
+
+ if ((ts.tv_sec < 0) || (((u64) ts.tv_sec) > ((u64) 1 << 40)))
+ log(L_WARN "Monotonic clock is crazy");
+
+ btime new_time = ts.tv_sec S + ts.tv_nsec NS;
+
+ if (new_time < old_time)
+ log(L_ERR "Monotonic clock is broken");
+
+ rv = clock_gettime(CLOCK_REALTIME, &ts);
+ if (rv < 0)
+ die("clock_gettime: %m");
+
+ btime new_real_time = ts.tv_sec S + ts.tv_nsec NS;
+
+ if (!atomic_compare_exchange_strong_explicit(
+ &last_time,
+ &old_time,
+ new_time,
+ memory_order_acq_rel,
+ memory_order_relaxed))
+ DBG("Time update collision: last_time");
+
+ if (!atomic_compare_exchange_strong_explicit(
+ &real_time,
+ &old_real_time,
+ new_real_time,
+ memory_order_acq_rel,
+ memory_order_relaxed))
+ DBG("Time update collision: real_time");
+}
+
+btime
+current_time_now(void)
+{
+ struct timespec ts;
+ int rv;
+
+ rv = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (rv < 0)
+ die("clock_gettime: %m");
+
+ return ts.tv_sec S + ts.tv_nsec NS;
+}
/* main.c */
-extern char *bird_name;
+extern const char *bird_name;
extern int parse_and_exit;
void async_config(void);
void async_dump(void);
void cmd_shutdown(void);
void cmd_graceful_restart(void);
void cmd_show_threads(int);
-void bird_thread_commit(struct config *new, struct config *old);
#define UNIX_DEFAULT_CONFIGURE_TIMEOUT 300
void io_init(void);
void io_loop(void);
void io_log_dump(struct dump_request *);
-int sk_open_unix(struct birdsock *s, struct birdloop *, const char *name);
enum rf_mode {
RF_APPEND = 1,
bt_test_id = NULL;
is_terminal = isatty(fileno(stdout));
+ set_daemon_name(argv[0], "birdtest");
+
while ((c = getopt(argc, argv, "lcdftv")) >= 0)
switch (c)
{
*/
int parse_and_exit;
-char *bird_name;
void async_config(void) {}
void async_dump(void) {}
void async_shutdown(void) {}
#include "nest/bird.h"
#include "lib/net.h"
#include "conf/conf.h"
-void sysdep_preconfig(struct config *c UNUSED) {}
+void sysdep_preconfig(struct config *c) {
+ alloc_preconfig(&c->runtime.alloc);
+}
-void bird_thread_commit(struct config *new, struct config *old);
-void sysdep_commit(struct config *new, struct config *old)
+void bird_thread_commit(struct thread_config *new);
+void sysdep_commit(struct config *new, struct config *old UNUSED)
{
- bird_thread_commit(new, old);
+ bird_thread_commit(&new->threads);
}
void sysdep_shutdown_done(void) {}